User:AlefZet/crh/LanguageCrh.php/Temp

From Wikimedia Incubator
Jump to navigation Jump to search

/*

*/
<?php
/** Crimean Tatar (Qırımtatarca)
  * converter routines
  *
  * @package MediaWiki
  * @subpackage Language
  */

require_once( dirname(__FILE__).'/../LanguageConverter.php' );
require_once( dirname(__FILE__).'/LanguageCrh_latn.php' );

class CrhConverter extends LanguageConverter {
    var $mLatinToCyrillic = array(
/*       'İYA' => 'ИЯ', 'İya' => 'Ия', 'iya' => 'ия',
        'İYE' => 'ИЕ', 'İye' => 'Ие', 'iye' => 'ие',
        'EYE' => 'ЕЕ', 'Eye' => 'Ее', 'eye' => 'ее',
        'ÜYÜ' => 'УЮ', 'Üyü' => 'Ую', 'üyü' => 'ую',
        'YA' => 'Я',  'Ya' => 'Я',  'ya' => 'я',
        'YE' => 'Е',  'Ye' => 'Е',  'ye' => 'е',
        'YO' => 'Ё',  'Yo' => 'Ё',  'yo' => 'ё',
        'YÖ' => 'Ё',  'Yö' => 'Ё',  'yö' => 'ё',
        'YU' => 'Ю',  'Yu' => 'Ю',  'yu' => 'ю',
        'YÜ' => 'Ю',  'Yü' => 'Ю',  'yü' => 'ю',
        //'ŞÇ' => 'Щ',  'Şç' => 'Щ',  'şç' => 'щ',
        //'TS' => 'Ц',  'Ts' => 'Ц',  'ts' => 'ц',

        'a'  => 'а',  'â'  => 'я',  'b'  => 'б',  'c'  => 'дж', 'ç'  => 'ч',  'd'  => 'д',
        'e'  => 'е',  'f'  => 'ф',  'g'  => 'г',  'ğ'  => 'гъ', 'h'  => 'х',  'i'  => 'и', 
        'ı'  => 'ы',  'j'  => 'ж',  'k'  => 'к',  'l'  => 'л',  'm'  => 'м',  'n'  => 'н', 
        'ñ'  => 'нъ', 'o'  => 'о',  'ö'  => 'ё',  'p'  => 'п',  'q'  => 'къ', 'r'  => 'р',
        's'  => 'с',  'ş'  => 'ш',  't'  => 'т',  'u'  => 'у',  'ü'  => 'ю',  'v'  => 'в',  
        'w'  => 'в',  'x'  => 'кс', 'y'  => 'й',  'z'  => 'з',

        'A'  => 'А',  'Â'  => 'Я',  'B'  => 'Б',  'C'  => 'Дж', 'Ç'  => 'Ч',  'D'  => 'Д',  
        'E'  => 'Е',  'F'  => 'Ф',  'G'  => 'Г',  'Ğ'  => 'Гъ', 'H'  => 'Х',  'İ'  => 'И',
        'I'  => 'Ы',  'J'  => 'Ж',  'K'  => 'К',  'L'  => 'Л',  'M'  => 'М',  'N'  => 'Н',
        'Ñ'  => 'Нъ', 'O'  => 'О',  'Ö'  => 'Ё',  'P'  => 'П',  'Q'  => 'Къ', 'R'  => 'Р',
        'S'  => 'С',  'Ş'  => 'Ш',  'T'  => 'Т',  'U'  => 'У',  'Ü'  => 'Ю',  'V'  => 'В',
        'W'  => 'В',  'X'  => 'Кс', 'Y'  => 'Й',  'Z'  => 'З'
*/    );

    var $mCyrillicToLatin = array(
/*        'ГЪ' => 'Ğ',  'Гъ' => 'Ğ',  'гъ' => 'ğ',
        'КЪ' => 'Q',  'Къ' => 'Q',  'къ' => 'q',
        'НЪ' => 'Ñ',  'Нъ' => 'Ñ',  'нъ' => 'ñ',
        'ДЖ' => 'C',  'Дж' => 'C',  'дж' => 'c',
        'ЪЕ' => 'YE', 'ъе' => 'ye',
        'ЪЁ' => 'YO', 'ъё' => 'yo',
        'ЬЕ' => 'YE', 'ье' => 'ye',
        'ЬЁ' => 'YO', 'ьё' => 'yo',

        'а'  => 'a',  'б'  => 'b',  'в'  => 'v',  'г'  => 'g',  'д'  => 'd',  'е'  => 'e',
        'ё'  => 'ö',  'ж'  => 'j',  'з'  => 'z',  'и'  => 'i',  'й'  => 'y',  'к'  => 'k',
        'л'  => 'l',  'м'  => 'm',  'н'  => 'n',  'о'  => 'o',  'п'  => 'p',  'р'  => 'r',
        'с'  => 's',  'т'  => 't',  'у'  => 'u',  'ф'  => 'f',  'х'  => 'h',  'ц'  => 'ts',
        'ч'  => 'ç',  'ш'  => 'ş',  'щ'  => 'şç', 'ъ'  => '',   'ы'  => 'ı',  'ь'  => '',
        'э'  => 'e',  'ю'  => 'ü',  'я'  => 'â',

        'А'  => 'A',  'Б'  => 'B',  'В'  => 'V',  'Г'  => 'G',  'Д'  => 'D',  'Е'  => 'E',
        'Ё'  => 'Ö',  'Ж'  => 'J',  'З'  => 'Z',  'И'  => 'İ',  'Й'  => 'Y',  'К'  => 'K',
        'Л'  => 'L',  'М'  => 'M',  'Н'  => 'N',  'О'  => 'O',  'П'  => 'P',  'Р'  => 'R',
        'С'  => 'S',  'Т'  => 'T',  'У'  => 'U',  'Ф'  => 'F',  'Х'  => 'H',  'Ц'  => 'Ts',
        'Ч'  => 'Ç',  'Ш'  => 'Ş',  'Щ'  => 'Şç', 'Ъ'  => '',   'Ы'  => 'I',  'Ь'  => '',
        'Э'  => 'É',  'Ю'  => 'Ü',  'Я'  => 'Â',
*/    );

    function loadDefaultTables() {
		    //require( "includes/CrhConversion.php" );

        $this->mTables = array();
        $this->mTables['crh-latn'] = $this->mCyrillicToLatin;
        $this->mTables['crh-cyrl'] = $this->mLatinToCyrillic;
        $this->mTables['crh'] = array();
    }

    /*
     * Override function from LanguageConvertor
     */
    function getPreferredVariant(){
		    global $wgUser, $wgRequest, $wgTitle;
        // Additional checks:
        //  - There should be no conversion for Talk pages
        if($wgTitle!=NULL && $wgTitle->isTalkPage()){
            return $this->mMainLanguageCode;
        }
		    return parent::getPreferredVariant();
	   }

    /*
     * A function wrapper, if there is no selected variant,
     * leave the link names as they were
     */
    function findVariantLink( &$link, &$nt ) {
        $oldlink=$link;
        parent::findVariantLink($link,$nt);
        if($this->getPreferredVariant()==$this->mMainLanguageCode)
            $link=$oldlink;
    }

    /*
     * We want our external link captions to be converted in variants,
     * so we return the original text instead -{$text}-, except for URLs
     */
    function markNoConversion($text) {
        if(preg_match("/^https?:\/\/|ftp:\/\/|irc:\/\//",$text))
            return parent::markNoConversion($text);
        return $text;
    }

    /*
     * An ugly function wrapper for parsing Image titles
     * (to prevent image name conversion)
     */
    function autoConvert($text, $toVariant=false) {
        global $wgTitle;
        if($wgTitle->getNameSpace()==NS_IMAGE){
            $imagename = $wgTitle->getNsText();
            if(preg_match("/^$imagename:/",$text)) return $text;
        }
        return parent::autoConvert($text,$toVariant);
    }

	function convertCategoryKey( $key ) {
		return $this->autoConvert( $key, 'crh-latn' );
	}

	function preConversion( $text, $toVariant ) {
		if ($toVariant == 'crh'){
// placeholder
		return $text;
    }
		if ($toVariant == 'crh-latn'){
// TODO: fill regexes
// regex to Cyrillic -> Latin
		return $text;
    }
    
		if ($toVariant == 'crh-cyrl'){
// TODO: move to different file
// regex to Latin -> Cyrillic

// Обозначения латинских букв с диакритиками
// A с крышкой  - \xc3\x82    a с крышкой  - \xc3\xa2
// C с седилем  - \xc3\x87    c с седилем  - \xc3\xa7
// G с кароном  - \xc4\x9e    g с кароном  - \xc4\x9f
// I с точкой   - \xc4\xb0    i без точки  - \xc4\xb1
// N с тильдой  - \xc3\x91    n с тильдой  - \xc3\xb1
// O с умляутом - \xc3\x96    o с умляутом - \xc3\xb6
// S с седилем  - \xc5\x9e    s с седилем  - \xc5\x9f
// U с умляутом - \xc3\x9c    u с умляутом - \xc3\xbc


// 0. словарик

// 0.A некоторые случаи употребления буквы ц

// ц-

$ver00011 = '/([\s"\(\-])ts/';
$text = preg_replace($ver00011, "$1ц", $text);

$ver00012 = '/([\s"\(\-])T[sS]/';
$text = preg_replace($ver00012, "$1Ц", $text);

// -ц

$ver00021 = '/ts([\s"\.\,\:)-])/';
$text = preg_replace($ver00021, "ц$1", $text);

$ver00022 = '/T[sS]([\s"\.\,\:)-])/';
$text = preg_replace($ver00022, "Ц$1", $text);

// -ци-

$ver00031 = '/tsi([^z])/';
$text = preg_replace($ver00031, "ци$1", $text);

$ver00032 = '/T[sS][i\xc4\xb0]([^zZ])/';
$text = preg_replace($ver00032, "ЦИ$1", $text);

// -цо-, -цу-

$ver00041 = '/ts([ou])/';
$text = preg_replace($ver00041, "ц$1", $text);

$ver00042 = '/T[sS]([oOuU])/';
$text = preg_replace($ver00042, "Ц$1", $text);

// - ц согл. -

$ver00051 = '/ts([bc\xc3\xa7dfghjklmnprs\xc5\x9ftvyzBC\xc3\x87DFGHJKLMNPRS\xc5\x9eTVYZ])/';
$text = preg_replace($ver00051, "ц$1", $text);

$ver00052 = '/T[sS]([bc\xc3\xa7dfghjklmnprs\xc5\x9ftvyzBC\xc3\x87DFGHJKLMNPRS\xc5\x9eTVYZ])/';
$text = preg_replace($ver00052, "Ц$1", $text);

// - согл. ц -

$ver00061 = '/([bc\xc3\xa7dfghjklmnprs\xc5\x9ftvyzBC\xc3\x87DFGHJKLMNPRS\xc5\x9eTVYZ])ts/';
$text = preg_replace($ver00061, "$1ц", $text);

$ver00062 = '/([bc\xc3\xa7dfghjklmnprs\xc5\x9ftvyzBC\xc3\x87DFGHJKLMNPRS\xc5\x9eTVYZ])T[sS]/';
$text = preg_replace($ver00062, "$1Ц", $text);


// 1. буквы гъ, къ, нъ

$lit112 = '/\xc4\x9e([a\xc3\xa2bc\xc3\xa7defg\xc4\x9fh\xc4\xb1ijklmn\xc3\xb1o\xc3\xb6pqrs\xc5\x9ftu\xc3\xbcvyz])/';
$text = preg_replace($lit112, "Гъ$1", $text);

$lit113 = '/\xc4\x9e([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZ])/';
$text = preg_replace($lit113, "ГЪ$1", $text);

$lit114 = '/([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZ])\xc4\x9e/';
$text = preg_replace($lit114, "$1ГЪ", $text);


$lit122 = '/Q([a\xc3\xa2bc\xc3\xa7defg\xc4\x9fh\xc4\xb1ijklmn\xc3\xb1o\xc3\xb6pqrs\xc5\x9ftu\xc3\xbcvyz])/';
$text = preg_replace($lit122, "Къ$1", $text);

$lit123 = '/Q([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZ])/';
$text = preg_replace($lit123, "КЪ$1", $text);

$lit124 = '/([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZ])Q/';
$text = preg_replace($lit124, "$1КЪ", $text);


$lit132 = '/\xc3\x91([a\xc3\xa2bc\xc3\xa7defg\xc4\x9fh\xc4\xb1ijklmn\xc3\xb1o\xc3\xb6pqrs\xc5\x9ftu\xc3\xbcvyz])/';
$text = preg_replace($lit132, "Нъ$1", $text);

$lit133 = '/\xc3\x91([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZ])/';
$text = preg_replace($lit133, "НЪ$1", $text);

$lit134 = '/([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZ])\xc3\x91/';
$text = preg_replace($lit134, "$1НЪ", $text);


// 2. расставляем Ь после Л

$lit211 = '/([ei\xc3\xb6\xc3\xbcE\xc4\xb0\xc3\x96\xc3\x9c])l([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyz\s"\.\,\:\)\-])/';
$text = preg_replace($lit211, "$1ль$2", $text);

$lit212 = '/([E\xc4\xb0\xc3\x96\xc3\x9c])L([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eHJKLMN\xc3\x91PQRS\xc5\x9eTVYZ\s"\.\,\:\)\-])/';
$text = preg_replace($lit212, "$1ЛЬ$2", $text);



// 3. обрабатываем ya и ye


// ya

$lit311 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])ya/';
$text = preg_replace($lit311, "$1ья", $text);

$lit312 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])Y[aA]/';
$text = preg_replace($lit312, "$1ЬЯ", $text);

$lit321 = '/ya/';
$text = preg_replace($lit321, "я", $text);

$lit322 = '/Y[aA]/';
$text = preg_replace($lit322, "Я", $text);


// ye

$lit331 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])ye/';
$text = preg_replace($lit331, "$1ье", $text);

$lit332 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])Y[eE]/';
$text = preg_replace($lit332, "$1ЬЕ", $text);

$lit341 = '/ye/';
$text = preg_replace($lit341, "е", $text);

$lit342 = '/Y[eE]/';
$text = preg_replace($lit342, "Е", $text);


// 4. расставляем буквы э

$lit41 = '/([ae\xc4\xb1io\xc3\xb6u\xc3\xbcаеэяAEI\xc4\xb0O\xc3\x96U\xc3\x9cАЕЭЯ\s"\(\-])e/';
$text = preg_replace($lit41, "$1э", $text);

$lit42 = '/([AEI\xc4\xb0O\xc3\x96U\xc3\x9cАЕЭЯ\s"\(\-])E/';
$text = preg_replace($lit42, "$1Э", $text);


// 5. буквы ё и ю - первый заход


// ё

// расставляем мягкие знаки после согласных

$lit511 = '/([\s"\(\-])([yY])\xc3\xb6([\xc3\xa7nprstz\xc3\x87NPRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit511, "$1$2о$3ь$4", $text);


$lit512 = '/([\s"\(\-])([yY])\xc3\x96([\xc3\xa7nprstz\xc3\x87NPRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit512, "$1$2у$3Ь$4", $text);


// расставляем мягкие знаки перед ё

$lit531 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])y[o\xc3\xb6]/';
$text = preg_replace($lit531, "$1ьё", $text);

$lit532 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])Y[o\xc3\xb6O\xc3\x96]/';
$text = preg_replace($lit532, "$1ЬЁ", $text);

// обрабатываем остальные вхождения ё

$lit541 = '/y[o\xc3\xb6]/';
$text = preg_replace($lit541, "ё", $text);

$lit542 = '/Y[o\xc3\xb6O\xc3\x96]/';
$text = preg_replace($lit542, "Ё", $text);

$lit543 = '/[yY][o\xc3\xb6O\xc3\x96]/';
$text = preg_replace($lit543, "Ё", $text);

// ю

// расставляем мягкие знаки после согласных

$lit551 = '/([\s"\(\-])([yY])\xc3\xbc([\xc3\xa7nprstz\xc3\x87NPRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit551, "$1$2\xc3\xbc$3ь$4", $text);
$text = preg_replace($lit551, "$1$2\xc3\xbc$3ь$4", $text);

$lit552 = '/([\s"\(\-])([yY])\xc3\x9c([\xc3\xa7nprstz\xc3\x87NPRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit552, "$1$2\xc3\x9c$3Ь$4", $text);
$text = preg_replace($lit552, "$1$2\xc3\x9c$3Ь$4", $text);

// расставляем мягкие знаки перед ю

$lit561 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])y[u\xc3\xbc]/';
$text = preg_replace($lit561, "$1ью", $text);

$lit562 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])Y[u\xc3\xbcU\xc3\x9c]/';
$text = preg_replace($lit562, "$1ЬЮ", $text);

// обрабатываем остальные вхождения ю

$lit571 = '/y[u\xc3\xbc]/';
$text = preg_replace($lit571, "ю", $text);

$lit572 = '/Y[u\xc3\xbcU\xc3\x9c]/';
$text = preg_replace($lit572, "Ю", $text);

$lit573 = '/[yY][u\xc3\xbcU\xc3\x9c]/';
$text = preg_replace($lit573, "Ю", $text);


// 6. обрабатываем bo'- bu'- и т.д. в первом слоге

// 6.1. случаи, когда нужен мягкий знак

// о

$lit6111 = '/([\s"\(\-])([bcgkmp\xc5\x9fBCGKMP\xc5\x9e])\xc3\xb6([\xc3\xa7nrstz\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit6111, "$1$2о$3ь$4", $text);
$text = preg_replace($lit6111, "$1$2о$3ь$4", $text);

$lit6121 = '/([\s"\(\-])([bcgkmp\xc5\x9fBCGKMP\xc5\x9e])\xc3\x96([\xc3\xa7nrstz\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit6121, "$1$2О$3Ь$4", $text);
$text = preg_replace($lit6121, "$1$2О$3Ь$4", $text);

// у

$lit6131 = '/([\s"\(\-])([bcgkmp\xc5\x9fBCGKMP\xc5\x9e])\xc3\xbc([\xc3\xa7nrstz\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit6131, "$1$2у$3ь$4", $text);
$text = preg_replace($lit6131, "$1$2у$3ь$4", $text);

$lit6141 = '/([\s"\(\-])([bcgkmp\xc5\x9fBCGKMP\xc5\x9e])\xc3\x9c([\xc3\xa7nrstz\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit6141, "$1$2У$3Ь$4", $text);
$text = preg_replace($lit6141, "$1$2У$3Ь$4", $text);


// 6.2. случаи, когда мягкий знак не нужен

// о

$lit6211 = '/([\s"\(\-])([bcgkm\xc5\x9fBCGKM\xc5\x9e])\xc3\xb6/';
$text = preg_replace($lit6211, "$1$2о", $text);


$lit6221 = '/([\s"\(\-])([bcgkm\xc5\x9fBCGKM\xc5\x9e])\xc3\x96/';
$text = preg_replace($lit6221, "$1$2О", $text);

// у

$lit6231 = '/([\s"\(\-])([bcgkm\xc5\x9fBCGKM\xc5\x9e])\xc3\xbc/';
$text = preg_replace($lit6231, "$1$2у", $text);

$lit6241 = '/([\s"\(\-])([bcgkm\xc5\x9fBCGKM\xc5\x9e])\xc3\x9c/';
$text = preg_replace($lit6241, "$1$2У", $text);


// 7. обрабатываем o' и u' в начале слова

// 7.1. случаи, когда мягкий знак нужен

// о

$lit7111 = '/([\s"\(\-])\xc3\xb6([\xc3\xa7nrstz\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit7111, "$1о$2ь$3", $text);
$text = preg_replace($lit7111, "$1о$2ь$3", $text);

$lit7121 = '/([\s"\(\-])\xc3\x96([\xc3\xa7nrstz])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit7121, "$1О$2ь$3", $text);
$text = preg_replace($lit7121, "$1О$2ь$3", $text);

$lit7131 = '/([\s"\(\-])\xc3\x96([\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit7131, "$1О$2Ь$3", $text);
$text = preg_replace($lit7131, "$1О$2Ь$3", $text);

// у

$lit7141 = '/([\s"\(\-])\xc3\xbc([\xc3\xa7nrstz\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit7141, "$1у$2ь$3", $text);
$text = preg_replace($lit7141, "$1у$2ь$3", $text);

$lit7151 = '/([\s"\(\-])\xc3\x9c([\xc3\xa7nrstz])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit7151, "$1У$2ь$3", $text);
$text = preg_replace($lit7151, "$1У$2ь$3", $text);

$lit7161 = '/([\s"\(\-])\xc3\x9c([\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/';
$text = preg_replace($lit7161, "$1У$2Ь$3", $text);
$text = preg_replace($lit7161, "$1У$2Ь$3", $text);


// 7.2. случаи, когда мягкий знак не нужен

// о

$lit7211 = '/([\s"\(\-])\xc3\xb6/';
$text = preg_replace($lit7211, "$1о", $text);

$lit7221 = '/([\s"\(\-])\xc3\x96/';
$text = preg_replace($lit7221, "$1О", $text);

// у

$lit7231 = '/([\s"\(\-])\xc3\xbc/';
$text = preg_replace($lit7231, "$1у", $text);

$lit7241 = '/([\s"\(\-])\xc3\x9c/';
$text = preg_replace($lit7241, "$1У", $text);


// 8. все оставшиеся вхождения букв a, e, o, o', u, u', y

$lit811 = '/a/';
$text = preg_replace($lit811, "а", $text);

$lit812 = '/A/';
$text = preg_replace($lit812, "А", $text);

$lit821 = '/e/';
$text = preg_replace($lit821, "е", $text);

$lit822 = '/E/';
$text = preg_replace($lit822, "Е", $text);

$lit831 = '/o/';
$text = preg_replace($lit831, "о", $text);

$lit832 = '/O/';
$text = preg_replace($lit832, "О", $text);

$lit841 = '/\xc3\xb6/';
$text = preg_replace($lit841, "ё", $text);

$lit842 = '/\xc3\x96/';
$text = preg_replace($lit842, "Ё", $text);

$lit851 = '/u/';
$text = preg_replace($lit851, "у", $text);

$lit852 = '/U/';
$text = preg_replace($lit852, "У", $text);

$lit861 = '/\xc3\xbc/';
$text = preg_replace($lit861, "ю", $text);

$lit862 = '/\xc3\x9c/';
$text = preg_replace($lit862, "Ю", $text);

$lit871 = '/y/';
$text = preg_replace($lit871, "й", $text);

$lit872 = '/Y/';
$text = preg_replace($lit872, "Й", $text);


// 9. буква c

$lit92 = '/C([a\xc3\xa2bc\xc3\xa7defg\xc4\x9fh\xc4\xb1ijklmn\xc3\xb1o\xc3\xb6pqrs\xc5\x9ftu\xc3\xbcvyz\xd0\xb0\xd0\xb5\xd1\x91\xd0\xb9\xd0\xbe\xd1\x83\xd1\x8d\xd1\x8e\xd1\x8f])/';
$text = preg_replace($lit92, "Дж$1", $text);

$lit93 = '/C([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZАЕЁЙОУЭЮЯ])/';
$text = preg_replace($lit93, "ДЖ$1", $text);

$lit94 = '/([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZАЕЁЙОУЭЮЯ])C/';
$text = preg_replace($lit94, "$1ДЖ", $text);

// 10. все остальные буквы

// строчные


$lit10101 = '/\xc3\xa2/';
$text = preg_replace($lit10101, "я", $text);

$lit10102 = '/b/';
$text = preg_replace($lit10102, "б", $text);

$lit10103 = '/\xc3\xa7/';
$text = preg_replace($lit10103, "ч", $text);

$lit10104 = '/d/';
$text = preg_replace($lit10104, "д", $text);

$lit10105 = '/f/';
$text = preg_replace($lit10105, "ф", $text);

$lit10106 = '/g/';
$text = preg_replace($lit10106, "г", $text);

$lit10107 = '/h/';
$text = preg_replace($lit10107, "х", $text);

$lit10108 = '/\xc4\xb1/';
$text = preg_replace($lit10108, "ы", $text);

$lit10109 = '/i/';
$text = preg_replace($lit10109, "и", $text);

$lit10110 = '/j/';
$text = preg_replace($lit10110, "ж", $text);

$lit10111 = '/k/';
$text = preg_replace($lit10111, "к", $text);

$lit10112 = '/l/';
$text = preg_replace($lit10112, "л", $text);

$lit10113 = '/m/';
$text = preg_replace($lit10113, "м", $text);

$lit10114 = '/n/';
$text = preg_replace($lit10114, "н", $text);

$lit10115 = '/p/';
$text = preg_replace($lit10115, "п", $text);

$lit10116 = '/r/';
$text = preg_replace($lit10116, "р", $text);

$lit10117 = '/s/';
$text = preg_replace($lit10117, "с", $text);

$lit10118 = '/\xc5\x9f/';
$text = preg_replace($lit10118, "ш", $text);

$lit10119 = '/t/';
$text = preg_replace($lit10119, "т", $text);

$lit10120 = '/v/';
$text = preg_replace($lit10120, "в", $text);

$lit10121 = '/z/';
$text = preg_replace($lit10121, "з", $text);

// заглавные

$lit10201 = '/\xc3\x82/';
$text = preg_replace($lit10201, "Я", $text);

$lit10202 = '/B/';
$text = preg_replace($lit10202, "Б", $text);

$lit10203 = '/\xc3\x87/';
$text = preg_replace($lit10203, "Ч", $text);

$lit10204 = '/D/';
$text = preg_replace($lit10204, "Д", $text);

$lit10205 = '/F/';
$text = preg_replace($lit10205, "Ф", $text);

$lit10206 = '/G/';
$text = preg_replace($lit10206, "Г", $text);

$lit10207 = '/H/';
$text = preg_replace($lit10207, "Х", $text);

$lit10208 = '/I/';
$text = preg_replace($lit10208, "Ы", $text);

$lit10209 = '/\xc4\xb0/';
$text = preg_replace($lit10209, "И", $text);

$lit10210 = '/J/';
$text = preg_replace($lit10210, "Ж", $text);

$lit10211 = '/K/';
$text = preg_replace($lit10211, "К", $text);

$lit10212 = '/L/';
$text = preg_replace($lit10212, "Л", $text);

$lit10213 = '/M/';
$text = preg_replace($lit10213, "М", $text);

$lit10214 = '/N/';
$text = preg_replace($lit10214, "Н", $text);

$lit10215 = '/P/';
$text = preg_replace($lit10215, "П", $text);

$lit10216 = '/R/';
$text = preg_replace($lit10216, "Р", $text);

$lit10217 = '/S/';
$text = preg_replace($lit10217, "С", $text);

$lit10218 = '/\xc5\x9e/';
$text = preg_replace($lit10218, "Ш", $text);

$lit10219 = '/T/';
$text = preg_replace($lit10219, "Т", $text);

$lit10220 = '/V/';
$text = preg_replace($lit10220, "В", $text);

$lit10221 = '/Z/';
$text = preg_replace($lit10221, "З", $text);



// 11. убираем сочетания кьк, льл, ньн, рьр, сьс, тьт

$lit1111 = '/кьк
$text = preg_replace($lit1111, "кк $text);

$lit1112 = '/К[ьЬ]к';
$text = preg_replace($lit1112, "Кк", $text);

$lit1113 = '/КЬК/';
$text = preg_replace($lit1113, "КК", $text);

$lit1111 = '/льл/';
$text = preg_replace($lit1111, "лл", $text);

$lit1112 = '/Л[ьЬ]л/';
$text = preg_replace($lit1112, "Лл", $text);

$lit1113 = '/ЛЬЛ/';
$text = preg_replace($lit1113, "ЛЛ", $text);

$lit1121 = '/ньн/';
$text = preg_replace($lit1121, "нн", $text);

$lit1122 = '/Н[ьЬ]н/';
$text = preg_replace($lit1122, "Нн", $text);

$lit1123 = '/НЬН/';
$text = preg_replace($lit1123, "НН", $text);

$lit1131 = '/рьр/';
$text = preg_replace($lit1131, "рр", $text);

$lit1132 = '/Р[ьЬ]р/';
$text = preg_replace($lit1132, "Рр", $text);

$lit1133 = '/РЬР/';
$text = preg_replace($lit1133, "РР", $text);

$lit1141 = '/сьс/';
$text = preg_replace($lit1141, "сс", $text);

$lit1142 = '/С[ьЬ]с/';
$text = preg_replace($lit1142, "Сс", $text);

$lit1143 = '/СЬС/';
$text = preg_replace($lit1143, "СС", $text);

$lit1151 = '/тьт/';
$text = preg_replace($lit1151, "тт", $text);

$lit1152 = '/Т[ьЬ]т/';
$text = preg_replace($lit1152, "Тт", $text);

$lit1153 = '/ТЬТ/';
$text = preg_replace($lit1153, "ТТ", $text);

		return $text;
    }
  }

	/**
	 *  It translates text into variant, specials:
	 *    - ommiting roman numbers
	 */
	function translate($text, $toVariant){
		$text = $this->preConversion( $text, $toVariant );

		$breaks = '[^\w\x80-\xff]';
		// regexp for roman numbers
		$roman = 'M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})';

		$reg = '/^'.$roman.'$|^'.$roman.$breaks.'|'.$breaks.$roman.'$|'.$breaks.$roman.$breaks.'/';

		$matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);

		$m = array_shift($matches);
		$ret = strtr($m[0], $this->mTables[$toVariant]);
		$mstart = $m[1]+strlen($m[0]);
		foreach($matches as $m) {
			$ret .= substr($text, $mstart, $m[1]-$mstart);
			$ret .= parent::translate($m[0], $toVariant);
			$mstart = $m[1] + strlen($m[0]);
		}

		return $ret;
	}

}

class LanguageCrh extends LanguageCrh_latn {

  function __construct() {
    global $wgHooks;
		parent::__construct();

   $variants = array('crh', 'crh-latn', 'crh-cyrl');
   $variantfallbacks = array(
      'crh'      => 'crh-latn',
      'crh-latn' => 'crh-latn',
			'crh-cyrl' => 'crh-cyrl'
    );

    $this->mConverter = new CrhConverter( $this, 'crh', $variants, $variantfallbacks );
    $wgHooks['ArticleSaveComplete'][] = $this->mConverter;
  }

	function convertGrammar( $word, $case ) {
		$fname="LanguageCrh::convertGrammar";
		wfProfileIn( $fname );

    //always convert to -latn before convertGrammar
    $w1 = $word;
		$word = $this->mConverter->autoConvert($word, 'crh-latn');
    $w2 = $word;
		$word = parent::convertGrammar( $word, $case );
    //restore encoding
    if( $w1 != $w2 ) {
       $word = $this->mConverter->translate($word, 'crh-cyrl');
    }
		wfProfileOut( $fname );
		return $word;
	}

}

?>
/*

*/