commit d98b7d731555ae78ce7cdd7c7f4c8f8e302e71aa Author: Michael Razuvaev Date: Mon Dec 1 11:10:46 2008 +0000 Metrica: Каталоги относящиеся к исходникам перенесены в src/ diff --git a/libs/libcommon/README b/libs/libcommon/README new file mode 100644 index 00000000000..9fd4196e0f2 --- /dev/null +++ b/libs/libcommon/README @@ -0,0 +1 @@ +Common source code diff --git a/libs/libcommon/src/daemon.cpp b/libs/libcommon/src/daemon.cpp new file mode 100644 index 00000000000..59c52556e3f --- /dev/null +++ b/libs/libcommon/src/daemon.cpp @@ -0,0 +1,287 @@ + +#include + +#include +#include +#include +#include + +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include + + +#include "revision.h" + +using Poco::Logger; +using Poco::AutoPtr; +using Poco::Observer; +using Poco::PatternFormatter; +using Poco::FormattingChannel; +using Poco::SplitterChannel; +using Poco::ConsoleChannel; +using Poco::FileChannel; +using Poco::Path; +using Poco::Message; +using Poco::Util::AbstractConfiguration; + +void Daemon::reloadConfiguration() +{ + + try + { + if( !config().hasProperty("config-file") ) throw Yandex::BException("config-file and pid-file should be specified"); + //config().remove(Poco::Util::Application::PRIO_DEFAULT); + loadConfiguration(config().getString("config-file")); + + // Перейдём в каталожек, чтобы нормально писать логи и коры + if( config().hasProperty("logger.log") && !config().hasProperty("logger.console") ) + { + std::string path = Yandex::mkdir( config().getString("logger.log") ); + if(config().getBool("application.runAsDaemon", false)) chdir(path.c_str()); + } + else + { + if(config().getBool("application.runAsDaemon", false)) chdir("/tmp"); + } + + buildLoggers(); + Logger *log = &Logger::get( "Daemon.reloadConfiguration()" ); + + // Виртуальная функция, чтобы пользователь мог обновить кеш конфигурации + try + { + refreshConfigCache(); + } + catch(const Poco::Exception& ex) + { + LOG_ERROR(log, "PoCo error while refresh config: " << ex.displayText()); + if( !is_Running ) throw; + } + catch (const std::exception& ex) + { + LOG_ERROR(log, "STD error while refresh config: " << ex.what()); + if( !is_Running ) throw; + } + catch (...) + { + LOG_ERROR(log, "UNKNOWN error while refresh config"); + if( !is_Running ) throw; + } + + } + catch(Poco::Exception& ex) + { + throw; + } + + // Если уже на работающем демоне, кто-то меняет конфиг так, что вываливается исключение - это его + // проблемы - будем всё равно работать + is_Running = true; +} + +/// Строит необходимые логгеры +void Daemon::buildLoggers() +{ + Poco::ScopedRWLock lock(*this); + + try + { + if( config().hasProperty("logger.log") && !config().hasProperty("logger.console") ) + { + std::cerr << "Should logs to " << config().getString("logger.log") << std::endl; + + // splitter + SplitterChannel *split = new SplitterChannel(); + + // format + std::string format("%Y.%m.%d %H:%M:%S [ %I ] <%p> %s: %t"); + + // set up two channel chains + PatternFormatter *pf = new PatternFormatter(format); + pf->setProperty("times", "local"); + FormattingChannel *log = new FormattingChannel(pf); + FileChannel *file = new FileChannel(); + file->setProperty("path", config().getString("logger.log")); + file->setProperty("rotation", config().getRawString("logger.size", "100M")); + file->setProperty("archive", "number"); + file->setProperty("purgeCount", config().getRawString("logger.count", "1")); + log->setChannel(file); + split->addChannel(log); + file->open(); + + if( config().hasProperty("logger.errorlog") ) + { + std::cerr << "Should error logs to " << config().getString("logger.errorlog") << std::endl; + //Poco::LevelFilterChannel *level = new Poco::LevelFilterChannel(); + //level->setLevel(Message::PRIO_NOTICE); + PatternFormatter *pf = new PatternFormatter(format); + pf->setProperty("times", "local"); + FormattingChannel *errorlog = new FormattingChannel(pf); + FileChannel *errorfile = new FileChannel(); + errorfile->setProperty("path", config().getString("logger.errorlog")); + errorfile->setProperty("rotation", config().getRawString("logger.size", "100M")); + errorfile->setProperty("archive", "number"); + errorfile->setProperty("purgeCount", config().getRawString("logger.count", "1")); + errorlog->setChannel(errorfile); + //level->setChannel(errorlog); + //split->addChannel(level); + errorlog->open(); + } + + split->open(); + logger().close(); + logger().setChannel( split ); + } + else + { + // Выводим на консоль + ConsoleChannel *file = new ConsoleChannel(); + logger().close(); + logger().setChannel( file ); + logger().warning("Log file isn't specified. Logging to console"); + } + } + catch(...) + { + // Выводим на консоль + ConsoleChannel *file = new ConsoleChannel(); + logger().close(); + logger().setChannel( file ); + logger().warning("Can't log to file. Logging to console"); + throw; + } + + // Уровни для всех + logger().setLevel( config().getString("logger.level", "information") ); + + // Прикрутим к корневому логгеру + Logger::root().setLevel( logger().getLevel() ); + Logger::root().setChannel( logger().getChannel() ); + + // Уровни для явно указанных логгеров + AbstractConfiguration::Keys levels; + config().keys("logger.levels", levels); + if( !levels.empty() ) + { + for(AbstractConfiguration::Keys::iterator it=levels.begin();it!=levels.end();++it) + { + Logger::get(*it).setLevel( config().getString("logger.levels." + *it, "info") ); + } + } +} + +void Daemon::initialize(Application& self) +{ + /// В случае падения - сохраняем коры + { + struct rlimit rlim; + int res = getrlimit(RLIMIT_CORE, &rlim); + assert(!res); + rlim.rlim_cur = RLIM_INFINITY; + res = setrlimit(RLIMIT_CORE, &rlim); + assert(!res); + } + + // Сбросим генератор случайных чисел + srandom(time(NULL)); + + // Используется при загрузке конфигурации + is_Running = false; + + p_TaskManager = new ("TaskManager") TaskManager(); + ServerApplication::initialize(self); + + // Создадим pid-file + if( !config().hasProperty("pid") ) throw Yandex::BException("config-file and pid-file should be specified"); + m_Pid.seed( config().getString("pid") ); + + // Считаем конфигурацию + reloadConfiguration(); + + // Выведем ревизию демона + Logger::root().information("Starting daemon with svn revision " + Yandex::to_string(SVN_REVISION)); +} + +void Daemon::uninitialize() +{ + ServerApplication::uninitialize(); + delete p_TaskManager; +} + +/// Заставляет демон завершаться, если хотя бы одна задача завершилась неудачно +void Daemon::exitOnTaskError() +{ + Observer obs(*this, &Daemon::handleNotification); + getTaskManager().addObserver(obs); +} + +/// Используется при exitOnTaskError() +void Daemon::handleNotification(TaskFailedNotification *_tfn) +{ + AutoPtr fn(_tfn); + Logger *lg = &(logger()); + LOG_ERROR(lg, "Task '" << fn->task()->name() << "' failed. Daemon is shutting down. Reason - " << fn->reason().displayText()); + + ServerApplication::terminate(); +} + +void Daemon::defineOptions(Poco::Util::OptionSet& _options) +{ + Poco::Util::ServerApplication::defineOptions (_options); + + _options.addOption( + Poco::Util::Option ("config-file", "C", "load configuration from a given file") + .required (false) + .repeatable (false) + .argument ("") + .binding("config-file") + ); + + _options.addOption( + Poco::Util::Option ("log-file", "L", "use given log file") + .required (false) + .repeatable (false) + .argument ("") + .binding("logger.log") + ); + + _options.addOption( + Poco::Util::Option ("errorlog-file", "E", "use given log file for errors only") + .required (false) + .repeatable (false) + .argument ("") + .binding("logger.errorlog") + ); + + _options.addOption( + Poco::Util::Option ("pid-file", "P", "use given pidfile") + .required (false) + .repeatable (false) + .argument ("") + .binding("pid") + ); + + _options.addOption( + Poco::Util::Option ("console", "", "print logs on console only") + .required (false) + .repeatable (false) + .binding("logger.console") + ); +} diff --git a/libs/libpocoext/README b/libs/libpocoext/README new file mode 100644 index 00000000000..ec8488d338c --- /dev/null +++ b/libs/libpocoext/README @@ -0,0 +1 @@ +Portable Components (PoCo) extensions diff --git a/libs/libpocoext/charsets/txts/CP1251Encoding.txt b/libs/libpocoext/charsets/txts/CP1251Encoding.txt new file mode 100644 index 00000000000..8aca8828ea4 --- /dev/null +++ b/libs/libpocoext/charsets/txts/CP1251Encoding.txt @@ -0,0 +1,279 @@ +# +# Name: cp1251_WinCyrillic to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell +# K.D. Chang +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp1251_WinCyrillic code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp1251_WinCyrillic order +# +canonical is a windows-1251 +alias is a cp1251 +alias is a cp-1251 +alias is a win + +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x0004 #END OF TRANSMISSION +0x05 0x0005 #ENQUIRY +0x06 0x0006 #ACKNOWLEDGE +0x07 0x0007 #BELL +0x08 0x0008 #BACKSPACE +0x09 0x0009 #HORIZONTAL TABULATION +0x0A 0x000A #LINE FEED +0x0B 0x000B #VERTICAL TABULATION +0x0C 0x000C #FORM FEED +0x0D 0x000D #CARRIAGE RETURN +0x0E 0x000E #SHIFT OUT +0x0F 0x000F #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x0014 #DEVICE CONTROL FOUR +0x15 0x0015 #NEGATIVE ACKNOWLEDGE +0x16 0x0016 #SYNCHRONOUS IDLE +0x17 0x0017 #END OF TRANSMISSION BLOCK +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1A 0x001A #SUBSTITUTE +0x1B 0x001B #ESCAPE +0x1C 0x001C #FILE SEPARATOR +0x1D 0x001D #GROUP SEPARATOR +0x1E 0x001E #RECORD SEPARATOR +0x1F 0x001F #UNIT SEPARATOR +0x20 0x0020 #SPACE +0x21 0x0021 #EXCLAMATION MARK +0x22 0x0022 #QUOTATION MARK +0x23 0x0023 #NUMBER SIGN +0x24 0x0024 #DOLLAR SIGN +0x25 0x0025 #PERCENT SIGN +0x26 0x0026 #AMPERSAND +0x27 0x0027 #APOSTROPHE +0x28 0x0028 #LEFT PARENTHESIS +0x29 0x0029 #RIGHT PARENTHESIS +0x2A 0x002A #ASTERISK +0x2B 0x002B #PLUS SIGN +0x2C 0x002C #COMMA +0x2D 0x002D #HYPHEN-MINUS +0x2E 0x002E #FULL STOP +0x2F 0x002F #SOLIDUS +0x30 0x0030 #DIGIT ZERO +0x31 0x0031 #DIGIT ONE +0x32 0x0032 #DIGIT TWO +0x33 0x0033 #DIGIT THREE +0x34 0x0034 #DIGIT FOUR +0x35 0x0035 #DIGIT FIVE +0x36 0x0036 #DIGIT SIX +0x37 0x0037 #DIGIT SEVEN +0x38 0x0038 #DIGIT EIGHT +0x39 0x0039 #DIGIT NINE +0x3A 0x003A #COLON +0x3B 0x003B #SEMICOLON +0x3C 0x003C #LESS-THAN SIGN +0x3D 0x003D #EQUALS SIGN +0x3E 0x003E #GREATER-THAN SIGN +0x3F 0x003F #QUESTION MARK +0x40 0x0040 #COMMERCIAL AT +0x41 0x0041 #LATIN CAPITAL LETTER A +0x42 0x0042 #LATIN CAPITAL LETTER B +0x43 0x0043 #LATIN CAPITAL LETTER C +0x44 0x0044 #LATIN CAPITAL LETTER D +0x45 0x0045 #LATIN CAPITAL LETTER E +0x46 0x0046 #LATIN CAPITAL LETTER F +0x47 0x0047 #LATIN CAPITAL LETTER G +0x48 0x0048 #LATIN CAPITAL LETTER H +0x49 0x0049 #LATIN CAPITAL LETTER I +0x4A 0x004A #LATIN CAPITAL LETTER J +0x4B 0x004B #LATIN CAPITAL LETTER K +0x4C 0x004C #LATIN CAPITAL LETTER L +0x4D 0x004D #LATIN CAPITAL LETTER M +0x4E 0x004E #LATIN CAPITAL LETTER N +0x4F 0x004F #LATIN CAPITAL LETTER O +0x50 0x0050 #LATIN CAPITAL LETTER P +0x51 0x0051 #LATIN CAPITAL LETTER Q +0x52 0x0052 #LATIN CAPITAL LETTER R +0x53 0x0053 #LATIN CAPITAL LETTER S +0x54 0x0054 #LATIN CAPITAL LETTER T +0x55 0x0055 #LATIN CAPITAL LETTER U +0x56 0x0056 #LATIN CAPITAL LETTER V +0x57 0x0057 #LATIN CAPITAL LETTER W +0x58 0x0058 #LATIN CAPITAL LETTER X +0x59 0x0059 #LATIN CAPITAL LETTER Y +0x5A 0x005A #LATIN CAPITAL LETTER Z +0x5B 0x005B #LEFT SQUARE BRACKET +0x5C 0x005C #REVERSE SOLIDUS +0x5D 0x005D #RIGHT SQUARE BRACKET +0x5E 0x005E #CIRCUMFLEX ACCENT +0x5F 0x005F #LOW LINE +0x60 0x0060 #GRAVE ACCENT +0x61 0x0061 #LATIN SMALL LETTER A +0x62 0x0062 #LATIN SMALL LETTER B +0x63 0x0063 #LATIN SMALL LETTER C +0x64 0x0064 #LATIN SMALL LETTER D +0x65 0x0065 #LATIN SMALL LETTER E +0x66 0x0066 #LATIN SMALL LETTER F +0x67 0x0067 #LATIN SMALL LETTER G +0x68 0x0068 #LATIN SMALL LETTER H +0x69 0x0069 #LATIN SMALL LETTER I +0x6A 0x006A #LATIN SMALL LETTER J +0x6B 0x006B #LATIN SMALL LETTER K +0x6C 0x006C #LATIN SMALL LETTER L +0x6D 0x006D #LATIN SMALL LETTER M +0x6E 0x006E #LATIN SMALL LETTER N +0x6F 0x006F #LATIN SMALL LETTER O +0x70 0x0070 #LATIN SMALL LETTER P +0x71 0x0071 #LATIN SMALL LETTER Q +0x72 0x0072 #LATIN SMALL LETTER R +0x73 0x0073 #LATIN SMALL LETTER S +0x74 0x0074 #LATIN SMALL LETTER T +0x75 0x0075 #LATIN SMALL LETTER U +0x76 0x0076 #LATIN SMALL LETTER V +0x77 0x0077 #LATIN SMALL LETTER W +0x78 0x0078 #LATIN SMALL LETTER X +0x79 0x0079 #LATIN SMALL LETTER Y +0x7A 0x007A #LATIN SMALL LETTER Z +0x7B 0x007B #LEFT CURLY BRACKET +0x7C 0x007C #VERTICAL LINE +0x7D 0x007D #RIGHT CURLY BRACKET +0x7E 0x007E #TILDE +0x7F 0x007F #DELETE +0x80 0x0402 #CYRILLIC CAPITAL LETTER DJE +0x81 0x0403 #CYRILLIC CAPITAL LETTER GJE +0x82 0x201A #SINGLE LOW-9 QUOTATION MARK +0x83 0x0453 #CYRILLIC SMALL LETTER GJE +0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK +0x85 0x2026 #HORIZONTAL ELLIPSIS +0x86 0x2020 #DAGGER +0x87 0x2021 #DOUBLE DAGGER +0x88 0x20AC #EURO +0x89 0x2030 #PER MILLE SIGN +0x8A 0x0409 #CYRILLIC CAPITAL LETTER LJE +0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8C 0x040A #CYRILLIC CAPITAL LETTER NJE +0x8D 0x040C #CYRILLIC CAPITAL LETTER KJE +0x8E 0x040B #CYRILLIC CAPITAL LETTER TSHE +0x8F 0x040F #CYRILLIC CAPITAL LETTER DZHE +0x90 0x0452 #CYRILLIC SMALL LETTER DJE +0x91 0x2018 #LEFT SINGLE QUOTATION MARK +0x92 0x2019 #RIGHT SINGLE QUOTATION MARK +0x93 0x201C #LEFT DOUBLE QUOTATION MARK +0x94 0x201D #RIGHT DOUBLE QUOTATION MARK +0x95 0x2022 #BULLET +0x96 0x2013 #EN DASH +0x97 0x2014 #EM DASH +0x98 #UNDEFINED +0x99 0x2122 #TRADE MARK SIGN +0x9A 0x0459 #CYRILLIC SMALL LETTER LJE +0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9C 0x045A #CYRILLIC SMALL LETTER NJE +0x9D 0x045C #CYRILLIC SMALL LETTER KJE +0x9E 0x045B #CYRILLIC SMALL LETTER TSHE +0x9F 0x045F #CYRILLIC SMALL LETTER DZHE +0xA0 0x00A0 #NO-BREAK SPACE +0xA1 0x040E #CYRILLIC CAPITAL LETTER SHORT U +0xA2 0x045E #CYRILLIC SMALL LETTER SHORT U +0xA3 0x0408 #CYRILLIC CAPITAL LETTER JE +0xA4 0x00A4 #CURRENCY SIGN +0xA5 0x0490 #CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0xA6 0x00A6 #BROKEN BAR +0xA7 0x00A7 #SECTION SIGN +0xA8 0x0401 #CYRILLIC CAPITAL LETTER IO +0xA9 0x00A9 #COPYRIGHT SIGN +0xAA 0x0404 #CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xAB 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC #NOT SIGN +0xAD 0x00AD #SOFT HYPHEN +0xAE 0x00AE #REGISTERED SIGN +0xAF 0x0407 #CYRILLIC CAPITAL LETTER YI +0xB0 0x00B0 #DEGREE SIGN +0xB1 0x00B1 #PLUS-MINUS SIGN +0xB2 0x0406 #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0xB3 0x0456 #CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0xB4 0x0491 #CYRILLIC SMALL LETTER GHE WITH UPTURN +0xB5 0x00B5 #MICRO SIGN +0xB6 0x00B6 #PILCROW SIGN +0xB7 0x00B7 #MIDDLE DOT +0xB8 0x0451 #CYRILLIC SMALL LETTER IO +0xB9 0x2116 #NUMERO SIGN +0xBA 0x0454 #CYRILLIC SMALL LETTER UKRAINIAN IE +0xBB 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x0458 #CYRILLIC SMALL LETTER JE +0xBD 0x0405 #CYRILLIC CAPITAL LETTER DZE +0xBE 0x0455 #CYRILLIC SMALL LETTER DZE +0xBF 0x0457 #CYRILLIC SMALL LETTER YI +0xC0 0x0410 #CYRILLIC CAPITAL LETTER A +0xC1 0x0411 #CYRILLIC CAPITAL LETTER BE +0xC2 0x0412 #CYRILLIC CAPITAL LETTER VE +0xC3 0x0413 #CYRILLIC CAPITAL LETTER GHE +0xC4 0x0414 #CYRILLIC CAPITAL LETTER DE +0xC5 0x0415 #CYRILLIC CAPITAL LETTER IE +0xC6 0x0416 #CYRILLIC CAPITAL LETTER ZHE +0xC7 0x0417 #CYRILLIC CAPITAL LETTER ZE +0xC8 0x0418 #CYRILLIC CAPITAL LETTER I +0xC9 0x0419 #CYRILLIC CAPITAL LETTER SHORT I +0xCA 0x041A #CYRILLIC CAPITAL LETTER KA +0xCB 0x041B #CYRILLIC CAPITAL LETTER EL +0xCC 0x041C #CYRILLIC CAPITAL LETTER EM +0xCD 0x041D #CYRILLIC CAPITAL LETTER EN +0xCE 0x041E #CYRILLIC CAPITAL LETTER O +0xCF 0x041F #CYRILLIC CAPITAL LETTER PE +0xD0 0x0420 #CYRILLIC CAPITAL LETTER ER +0xD1 0x0421 #CYRILLIC CAPITAL LETTER ES +0xD2 0x0422 #CYRILLIC CAPITAL LETTER TE +0xD3 0x0423 #CYRILLIC CAPITAL LETTER U +0xD4 0x0424 #CYRILLIC CAPITAL LETTER EF +0xD5 0x0425 #CYRILLIC CAPITAL LETTER HA +0xD6 0x0426 #CYRILLIC CAPITAL LETTER TSE +0xD7 0x0427 #CYRILLIC CAPITAL LETTER CHE +0xD8 0x0428 #CYRILLIC CAPITAL LETTER SHA +0xD9 0x0429 #CYRILLIC CAPITAL LETTER SHCHA +0xDA 0x042A #CYRILLIC CAPITAL LETTER HARD SIGN +0xDB 0x042B #CYRILLIC CAPITAL LETTER YERU +0xDC 0x042C #CYRILLIC CAPITAL LETTER SOFT SIGN +0xDD 0x042D #CYRILLIC CAPITAL LETTER E +0xDE 0x042E #CYRILLIC CAPITAL LETTER YU +0xDF 0x042F #CYRILLIC CAPITAL LETTER YA +0xE0 0x0430 #CYRILLIC SMALL LETTER A +0xE1 0x0431 #CYRILLIC SMALL LETTER BE +0xE2 0x0432 #CYRILLIC SMALL LETTER VE +0xE3 0x0433 #CYRILLIC SMALL LETTER GHE +0xE4 0x0434 #CYRILLIC SMALL LETTER DE +0xE5 0x0435 #CYRILLIC SMALL LETTER IE +0xE6 0x0436 #CYRILLIC SMALL LETTER ZHE +0xE7 0x0437 #CYRILLIC SMALL LETTER ZE +0xE8 0x0438 #CYRILLIC SMALL LETTER I +0xE9 0x0439 #CYRILLIC SMALL LETTER SHORT I +0xEA 0x043A #CYRILLIC SMALL LETTER KA +0xEB 0x043B #CYRILLIC SMALL LETTER EL +0xEC 0x043C #CYRILLIC SMALL LETTER EM +0xED 0x043D #CYRILLIC SMALL LETTER EN +0xEE 0x043E #CYRILLIC SMALL LETTER O +0xEF 0x043F #CYRILLIC SMALL LETTER PE +0xF0 0x0440 #CYRILLIC SMALL LETTER ER +0xF1 0x0441 #CYRILLIC SMALL LETTER ES +0xF2 0x0442 #CYRILLIC SMALL LETTER TE +0xF3 0x0443 #CYRILLIC SMALL LETTER U +0xF4 0x0444 #CYRILLIC SMALL LETTER EF +0xF5 0x0445 #CYRILLIC SMALL LETTER HA +0xF6 0x0446 #CYRILLIC SMALL LETTER TSE +0xF7 0x0447 #CYRILLIC SMALL LETTER CHE +0xF8 0x0448 #CYRILLIC SMALL LETTER SHA +0xF9 0x0449 #CYRILLIC SMALL LETTER SHCHA +0xFA 0x044A #CYRILLIC SMALL LETTER HARD SIGN +0xFB 0x044B #CYRILLIC SMALL LETTER YERU +0xFC 0x044C #CYRILLIC SMALL LETTER SOFT SIGN +0xFD 0x044D #CYRILLIC SMALL LETTER E +0xFE 0x044E #CYRILLIC SMALL LETTER YU +0xFF 0x044F #CYRILLIC SMALL LETTER YA + diff --git a/libs/libpocoext/charsets/txts/CP866Encoding.txt b/libs/libpocoext/charsets/txts/CP866Encoding.txt new file mode 100644 index 00000000000..8545b63fcc6 --- /dev/null +++ b/libs/libpocoext/charsets/txts/CP866Encoding.txt @@ -0,0 +1,277 @@ +# +# Name: cp866_DOSCyrillicRussian to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell +# K.D. Chang +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp866_DOSCyrillicRussian code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp866_DOSCyrillicRussian order +# +canonical is a cp866 +alias is a cp-866 + +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x0004 #END OF TRANSMISSION +0x05 0x0005 #ENQUIRY +0x06 0x0006 #ACKNOWLEDGE +0x07 0x0007 #BELL +0x08 0x0008 #BACKSPACE +0x09 0x0009 #HORIZONTAL TABULATION +0x0a 0x000a #LINE FEED +0x0b 0x000b #VERTICAL TABULATION +0x0c 0x000c #FORM FEED +0x0d 0x000d #CARRIAGE RETURN +0x0e 0x000e #SHIFT OUT +0x0f 0x000f #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x0014 #DEVICE CONTROL FOUR +0x15 0x0015 #NEGATIVE ACKNOWLEDGE +0x16 0x0016 #SYNCHRONOUS IDLE +0x17 0x0017 #END OF TRANSMISSION BLOCK +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1a 0x001a #SUBSTITUTE +0x1b 0x001b #ESCAPE +0x1c 0x001c #FILE SEPARATOR +0x1d 0x001d #GROUP SEPARATOR +0x1e 0x001e #RECORD SEPARATOR +0x1f 0x001f #UNIT SEPARATOR +0x20 0x0020 #SPACE +0x21 0x0021 #EXCLAMATION MARK +0x22 0x0022 #QUOTATION MARK +0x23 0x0023 #NUMBER SIGN +0x24 0x0024 #DOLLAR SIGN +0x25 0x0025 #PERCENT SIGN +0x26 0x0026 #AMPERSAND +0x27 0x0027 #APOSTROPHE +0x28 0x0028 #LEFT PARENTHESIS +0x29 0x0029 #RIGHT PARENTHESIS +0x2a 0x002a #ASTERISK +0x2b 0x002b #PLUS SIGN +0x2c 0x002c #COMMA +0x2d 0x002d #HYPHEN-MINUS +0x2e 0x002e #FULL STOP +0x2f 0x002f #SOLIDUS +0x30 0x0030 #DIGIT ZERO +0x31 0x0031 #DIGIT ONE +0x32 0x0032 #DIGIT TWO +0x33 0x0033 #DIGIT THREE +0x34 0x0034 #DIGIT FOUR +0x35 0x0035 #DIGIT FIVE +0x36 0x0036 #DIGIT SIX +0x37 0x0037 #DIGIT SEVEN +0x38 0x0038 #DIGIT EIGHT +0x39 0x0039 #DIGIT NINE +0x3a 0x003a #COLON +0x3b 0x003b #SEMICOLON +0x3c 0x003c #LESS-THAN SIGN +0x3d 0x003d #EQUALS SIGN +0x3e 0x003e #GREATER-THAN SIGN +0x3f 0x003f #QUESTION MARK +0x40 0x0040 #COMMERCIAL AT +0x41 0x0041 #LATIN CAPITAL LETTER A +0x42 0x0042 #LATIN CAPITAL LETTER B +0x43 0x0043 #LATIN CAPITAL LETTER C +0x44 0x0044 #LATIN CAPITAL LETTER D +0x45 0x0045 #LATIN CAPITAL LETTER E +0x46 0x0046 #LATIN CAPITAL LETTER F +0x47 0x0047 #LATIN CAPITAL LETTER G +0x48 0x0048 #LATIN CAPITAL LETTER H +0x49 0x0049 #LATIN CAPITAL LETTER I +0x4a 0x004a #LATIN CAPITAL LETTER J +0x4b 0x004b #LATIN CAPITAL LETTER K +0x4c 0x004c #LATIN CAPITAL LETTER L +0x4d 0x004d #LATIN CAPITAL LETTER M +0x4e 0x004e #LATIN CAPITAL LETTER N +0x4f 0x004f #LATIN CAPITAL LETTER O +0x50 0x0050 #LATIN CAPITAL LETTER P +0x51 0x0051 #LATIN CAPITAL LETTER Q +0x52 0x0052 #LATIN CAPITAL LETTER R +0x53 0x0053 #LATIN CAPITAL LETTER S +0x54 0x0054 #LATIN CAPITAL LETTER T +0x55 0x0055 #LATIN CAPITAL LETTER U +0x56 0x0056 #LATIN CAPITAL LETTER V +0x57 0x0057 #LATIN CAPITAL LETTER W +0x58 0x0058 #LATIN CAPITAL LETTER X +0x59 0x0059 #LATIN CAPITAL LETTER Y +0x5a 0x005a #LATIN CAPITAL LETTER Z +0x5b 0x005b #LEFT SQUARE BRACKET +0x5c 0x005c #REVERSE SOLIDUS +0x5d 0x005d #RIGHT SQUARE BRACKET +0x5e 0x005e #CIRCUMFLEX ACCENT +0x5f 0x005f #LOW LINE +0x60 0x0060 #GRAVE ACCENT +0x61 0x0061 #LATIN SMALL LETTER A +0x62 0x0062 #LATIN SMALL LETTER B +0x63 0x0063 #LATIN SMALL LETTER C +0x64 0x0064 #LATIN SMALL LETTER D +0x65 0x0065 #LATIN SMALL LETTER E +0x66 0x0066 #LATIN SMALL LETTER F +0x67 0x0067 #LATIN SMALL LETTER G +0x68 0x0068 #LATIN SMALL LETTER H +0x69 0x0069 #LATIN SMALL LETTER I +0x6a 0x006a #LATIN SMALL LETTER J +0x6b 0x006b #LATIN SMALL LETTER K +0x6c 0x006c #LATIN SMALL LETTER L +0x6d 0x006d #LATIN SMALL LETTER M +0x6e 0x006e #LATIN SMALL LETTER N +0x6f 0x006f #LATIN SMALL LETTER O +0x70 0x0070 #LATIN SMALL LETTER P +0x71 0x0071 #LATIN SMALL LETTER Q +0x72 0x0072 #LATIN SMALL LETTER R +0x73 0x0073 #LATIN SMALL LETTER S +0x74 0x0074 #LATIN SMALL LETTER T +0x75 0x0075 #LATIN SMALL LETTER U +0x76 0x0076 #LATIN SMALL LETTER V +0x77 0x0077 #LATIN SMALL LETTER W +0x78 0x0078 #LATIN SMALL LETTER X +0x79 0x0079 #LATIN SMALL LETTER Y +0x7a 0x007a #LATIN SMALL LETTER Z +0x7b 0x007b #LEFT CURLY BRACKET +0x7c 0x007c #VERTICAL LINE +0x7d 0x007d #RIGHT CURLY BRACKET +0x7e 0x007e #TILDE +0x7f 0x007f #DELETE +0x80 0x0410 #CYRILLIC CAPITAL LETTER A +0x81 0x0411 #CYRILLIC CAPITAL LETTER BE +0x82 0x0412 #CYRILLIC CAPITAL LETTER VE +0x83 0x0413 #CYRILLIC CAPITAL LETTER GHE +0x84 0x0414 #CYRILLIC CAPITAL LETTER DE +0x85 0x0415 #CYRILLIC CAPITAL LETTER IE +0x86 0x0416 #CYRILLIC CAPITAL LETTER ZHE +0x87 0x0417 #CYRILLIC CAPITAL LETTER ZE +0x88 0x0418 #CYRILLIC CAPITAL LETTER I +0x89 0x0419 #CYRILLIC CAPITAL LETTER SHORT I +0x8a 0x041a #CYRILLIC CAPITAL LETTER KA +0x8b 0x041b #CYRILLIC CAPITAL LETTER EL +0x8c 0x041c #CYRILLIC CAPITAL LETTER EM +0x8d 0x041d #CYRILLIC CAPITAL LETTER EN +0x8e 0x041e #CYRILLIC CAPITAL LETTER O +0x8f 0x041f #CYRILLIC CAPITAL LETTER PE +0x90 0x0420 #CYRILLIC CAPITAL LETTER ER +0x91 0x0421 #CYRILLIC CAPITAL LETTER ES +0x92 0x0422 #CYRILLIC CAPITAL LETTER TE +0x93 0x0423 #CYRILLIC CAPITAL LETTER U +0x94 0x0424 #CYRILLIC CAPITAL LETTER EF +0x95 0x0425 #CYRILLIC CAPITAL LETTER HA +0x96 0x0426 #CYRILLIC CAPITAL LETTER TSE +0x97 0x0427 #CYRILLIC CAPITAL LETTER CHE +0x98 0x0428 #CYRILLIC CAPITAL LETTER SHA +0x99 0x0429 #CYRILLIC CAPITAL LETTER SHCHA +0x9a 0x042a #CYRILLIC CAPITAL LETTER HARD SIGN +0x9b 0x042b #CYRILLIC CAPITAL LETTER YERU +0x9c 0x042c #CYRILLIC CAPITAL LETTER SOFT SIGN +0x9d 0x042d #CYRILLIC CAPITAL LETTER E +0x9e 0x042e #CYRILLIC CAPITAL LETTER YU +0x9f 0x042f #CYRILLIC CAPITAL LETTER YA +0xa0 0x0430 #CYRILLIC SMALL LETTER A +0xa1 0x0431 #CYRILLIC SMALL LETTER BE +0xa2 0x0432 #CYRILLIC SMALL LETTER VE +0xa3 0x0433 #CYRILLIC SMALL LETTER GHE +0xa4 0x0434 #CYRILLIC SMALL LETTER DE +0xa5 0x0435 #CYRILLIC SMALL LETTER IE +0xa6 0x0436 #CYRILLIC SMALL LETTER ZHE +0xa7 0x0437 #CYRILLIC SMALL LETTER ZE +0xa8 0x0438 #CYRILLIC SMALL LETTER I +0xa9 0x0439 #CYRILLIC SMALL LETTER SHORT I +0xaa 0x043a #CYRILLIC SMALL LETTER KA +0xab 0x043b #CYRILLIC SMALL LETTER EL +0xac 0x043c #CYRILLIC SMALL LETTER EM +0xad 0x043d #CYRILLIC SMALL LETTER EN +0xae 0x043e #CYRILLIC SMALL LETTER O +0xaf 0x043f #CYRILLIC SMALL LETTER PE +0xb0 0x2591 #LIGHT SHADE +0xb1 0x2592 #MEDIUM SHADE +0xb2 0x2593 #DARK SHADE +0xb3 0x2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 0x2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 0x2561 #BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xb6 0x2562 #BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xb7 0x2556 #BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xb8 0x2555 #BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xb9 0x2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba 0x2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb 0x2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc 0x255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd 0x255c #BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xbe 0x255b #BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xbf 0x2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 0x2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 0x2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 0x252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 0x251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 0x2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 0x253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 0x255e #BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xc7 0x255f #BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xc8 0x255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 0x2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca 0x2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb 0x2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc 0x2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd 0x2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce 0x256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf 0x2567 #BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xd0 0x2568 #BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xd1 0x2564 #BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xd2 0x2565 #BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xd3 0x2559 #BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xd4 0x2558 #BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xd5 0x2552 #BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xd6 0x2553 #BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xd7 0x256b #BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xd8 0x256a #BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xd9 0x2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda 0x250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb 0x2588 #FULL BLOCK +0xdc 0x2584 #LOWER HALF BLOCK +0xdd 0x258c #LEFT HALF BLOCK +0xde 0x2590 #RIGHT HALF BLOCK +0xdf 0x2580 #UPPER HALF BLOCK +0xe0 0x0440 #CYRILLIC SMALL LETTER ER +0xe1 0x0441 #CYRILLIC SMALL LETTER ES +0xe2 0x0442 #CYRILLIC SMALL LETTER TE +0xe3 0x0443 #CYRILLIC SMALL LETTER U +0xe4 0x0444 #CYRILLIC SMALL LETTER EF +0xe5 0x0445 #CYRILLIC SMALL LETTER HA +0xe6 0x0446 #CYRILLIC SMALL LETTER TSE +0xe7 0x0447 #CYRILLIC SMALL LETTER CHE +0xe8 0x0448 #CYRILLIC SMALL LETTER SHA +0xe9 0x0449 #CYRILLIC SMALL LETTER SHCHA +0xea 0x044a #CYRILLIC SMALL LETTER HARD SIGN +0xeb 0x044b #CYRILLIC SMALL LETTER YERU +0xec 0x044c #CYRILLIC SMALL LETTER SOFT SIGN +0xed 0x044d #CYRILLIC SMALL LETTER E +0xee 0x044e #CYRILLIC SMALL LETTER YU +0xef 0x044f #CYRILLIC SMALL LETTER YA +0xf0 0x0401 #CYRILLIC CAPITAL LETTER IO +0xf1 0x0451 #CYRILLIC SMALL LETTER IO +0xf2 0x0404 #CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xf3 0x0454 #CYRILLIC SMALL LETTER UKRAINIAN IE +0xf4 0x0407 #CYRILLIC CAPITAL LETTER YI +0xf5 0x0457 #CYRILLIC SMALL LETTER YI +0xf6 0x040e #CYRILLIC CAPITAL LETTER SHORT U +0xf7 0x045e #CYRILLIC SMALL LETTER SHORT U +0xf8 0x00b0 #DEGREE SIGN +0xf9 0x2219 #BULLET OPERATOR +0xfa 0x00b7 #MIDDLE DOT +0xfb 0x221a #SQUARE ROOT +0xfc 0x2116 #NUMERO SIGN +0xfd 0x00a4 #CURRENCY SIGN +0xfe 0x25a0 #BLACK SQUARE +0xff 0x00a0 #NO-BREAK SPACE + diff --git a/libs/libpocoext/charsets/txts/ISO8859_5Encoding.txt b/libs/libpocoext/charsets/txts/ISO8859_5Encoding.txt new file mode 100644 index 00000000000..b77e40bc052 --- /dev/null +++ b/libs/libpocoext/charsets/txts/ISO8859_5Encoding.txt @@ -0,0 +1,265 @@ +# +# Name: ISO 8859-5 (1988) to Unicode +# Unicode version: 1.1 +# Table version: 0.1 +# Table format: Format A +# Date: 16 January 1995 +# Authors: Tim Greenwood +# John H. Jenkins +# +# Copyright (c) 1991-1995 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on magnetic media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Recipient is granted the right to make copies in any form for +# internal distribution and to freely use the information supplied +# in the creation of products supporting Unicode. Unicode, Inc. +# specifically excludes the right to re-distribute this file directly +# to third parties or other organizations whether for profit or not. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO 8859-5 (1988) characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO 8859-5 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO 8859-5 order +# +# Any comments or problems, contact +# +canonical is a iso-8859-5 +alias is a iso8859-5 + +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x0004 #END OF TRANSMISSION +0x05 0x0005 #ENQUIRY +0x06 0x0006 #ACKNOWLEDGE +0x07 0x0007 #BELL +0x08 0x0008 #BACKSPACE +0x09 0x0009 #HORIZONTAL TABULATION +0x0a 0x000a #LINE FEED +0x0b 0x000b #VERTICAL TABULATION +0x0c 0x000c #FORM FEED +0x0d 0x000d #CARRIAGE RETURN +0x0e 0x000e #SHIFT OUT +0x0f 0x000f #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x0014 #DEVICE CONTROL FOUR +0x15 0x0015 #NEGATIVE ACKNOWLEDGE +0x16 0x0016 #SYNCHRONOUS IDLE +0x17 0x0017 #END OF TRANSMISSION BLOCK +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1a 0x001a #SUBSTITUTE +0x1b 0x001b #ESCAPE +0x1c 0x001c #FILE SEPARATOR +0x1d 0x001d #GROUP SEPARATOR +0x1e 0x001e #RECORD SEPARATOR +0x1f 0x001f #UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0401 # CYRILLIC CAPITAL LETTER IO +0xA2 0x0402 # CYRILLIC CAPITAL LETTER DJE +0xA3 0x0403 # CYRILLIC CAPITAL LETTER GJE +0xA4 0x0404 # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xA5 0x0405 # CYRILLIC CAPITAL LETTER DZE +0xA6 0x0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0xA7 0x0407 # CYRILLIC CAPITAL LETTER YI +0xA8 0x0408 # CYRILLIC CAPITAL LETTER JE +0xA9 0x0409 # CYRILLIC CAPITAL LETTER LJE +0xAA 0x040A # CYRILLIC CAPITAL LETTER NJE +0xAB 0x040B # CYRILLIC CAPITAL LETTER TSHE +0xAC 0x040C # CYRILLIC CAPITAL LETTER KJE +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x040E # CYRILLIC CAPITAL LETTER SHORT U +0xAF 0x040F # CYRILLIC CAPITAL LETTER DZHE +0xB0 0x0410 # CYRILLIC CAPITAL LETTER A +0xB1 0x0411 # CYRILLIC CAPITAL LETTER BE +0xB2 0x0412 # CYRILLIC CAPITAL LETTER VE +0xB3 0x0413 # CYRILLIC CAPITAL LETTER GHE +0xB4 0x0414 # CYRILLIC CAPITAL LETTER DE +0xB5 0x0415 # CYRILLIC CAPITAL LETTER IE +0xB6 0x0416 # CYRILLIC CAPITAL LETTER ZHE +0xB7 0x0417 # CYRILLIC CAPITAL LETTER ZE +0xB8 0x0418 # CYRILLIC CAPITAL LETTER I +0xB9 0x0419 # CYRILLIC CAPITAL LETTER SHORT I +0xBA 0x041A # CYRILLIC CAPITAL LETTER KA +0xBB 0x041B # CYRILLIC CAPITAL LETTER EL +0xBC 0x041C # CYRILLIC CAPITAL LETTER EM +0xBD 0x041D # CYRILLIC CAPITAL LETTER EN +0xBE 0x041E # CYRILLIC CAPITAL LETTER O +0xBF 0x041F # CYRILLIC CAPITAL LETTER PE +0xC0 0x0420 # CYRILLIC CAPITAL LETTER ER +0xC1 0x0421 # CYRILLIC CAPITAL LETTER ES +0xC2 0x0422 # CYRILLIC CAPITAL LETTER TE +0xC3 0x0423 # CYRILLIC CAPITAL LETTER U +0xC4 0x0424 # CYRILLIC CAPITAL LETTER EF +0xC5 0x0425 # CYRILLIC CAPITAL LETTER HA +0xC6 0x0426 # CYRILLIC CAPITAL LETTER TSE +0xC7 0x0427 # CYRILLIC CAPITAL LETTER CHE +0xC8 0x0428 # CYRILLIC CAPITAL LETTER SHA +0xC9 0x0429 # CYRILLIC CAPITAL LETTER SHCHA +0xCA 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN +0xCB 0x042B # CYRILLIC CAPITAL LETTER YERU +0xCC 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN +0xCD 0x042D # CYRILLIC CAPITAL LETTER E +0xCE 0x042E # CYRILLIC CAPITAL LETTER YU +0xCF 0x042F # CYRILLIC CAPITAL LETTER YA +0xD0 0x0430 # CYRILLIC SMALL LETTER A +0xD1 0x0431 # CYRILLIC SMALL LETTER BE +0xD2 0x0432 # CYRILLIC SMALL LETTER VE +0xD3 0x0433 # CYRILLIC SMALL LETTER GHE +0xD4 0x0434 # CYRILLIC SMALL LETTER DE +0xD5 0x0435 # CYRILLIC SMALL LETTER IE +0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE +0xD7 0x0437 # CYRILLIC SMALL LETTER ZE +0xD8 0x0438 # CYRILLIC SMALL LETTER I +0xD9 0x0439 # CYRILLIC SMALL LETTER SHORT I +0xDA 0x043A # CYRILLIC SMALL LETTER KA +0xDB 0x043B # CYRILLIC SMALL LETTER EL +0xDC 0x043C # CYRILLIC SMALL LETTER EM +0xDD 0x043D # CYRILLIC SMALL LETTER EN +0xDE 0x043E # CYRILLIC SMALL LETTER O +0xDF 0x043F # CYRILLIC SMALL LETTER PE +0xE0 0x0440 # CYRILLIC SMALL LETTER ER +0xE1 0x0441 # CYRILLIC SMALL LETTER ES +0xE2 0x0442 # CYRILLIC SMALL LETTER TE +0xE3 0x0443 # CYRILLIC SMALL LETTER U +0xE4 0x0444 # CYRILLIC SMALL LETTER EF +0xE5 0x0445 # CYRILLIC SMALL LETTER HA +0xE6 0x0446 # CYRILLIC SMALL LETTER TSE +0xE7 0x0447 # CYRILLIC SMALL LETTER CHE +0xE8 0x0448 # CYRILLIC SMALL LETTER SHA +0xE9 0x0449 # CYRILLIC SMALL LETTER SHCHA +0xEA 0x044A # CYRILLIC SMALL LETTER HARD SIGN +0xEB 0x044B # CYRILLIC SMALL LETTER YERU +0xEC 0x044C # CYRILLIC SMALL LETTER SOFT SIGN +0xED 0x044D # CYRILLIC SMALL LETTER E +0xEE 0x044E # CYRILLIC SMALL LETTER YU +0xEF 0x044F # CYRILLIC SMALL LETTER YA +0xF0 0x2116 # NUMERO SIGN +0xF1 0x0451 # CYRILLIC SMALL LETTER IO +0xF2 0x0452 # CYRILLIC SMALL LETTER DJE +0xF3 0x0453 # CYRILLIC SMALL LETTER GJE +0xF4 0x0454 # CYRILLIC SMALL LETTER UKRAINIAN IE +0xF5 0x0455 # CYRILLIC SMALL LETTER DZE +0xF6 0x0456 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0xF7 0x0457 # CYRILLIC SMALL LETTER YI +0xF8 0x0458 # CYRILLIC SMALL LETTER JE +0xF9 0x0459 # CYRILLIC SMALL LETTER LJE +0xFA 0x045A # CYRILLIC SMALL LETTER NJE +0xFB 0x045B # CYRILLIC SMALL LETTER TSHE +0xFC 0x045C # CYRILLIC SMALL LETTER KJE +0xFD 0x00A7 # SECTION SIGN +0xFE 0x045E # CYRILLIC SMALL LETTER SHORT U +0xFF 0x045F # CYRILLIC SMALL LETTER DZHE diff --git a/libs/libpocoext/charsets/txts/KOI8REncoding.txt b/libs/libpocoext/charsets/txts/KOI8REncoding.txt new file mode 100644 index 00000000000..111904c2ca0 --- /dev/null +++ b/libs/libpocoext/charsets/txts/KOI8REncoding.txt @@ -0,0 +1,264 @@ +# +# Koi8 to unicode translation (from rfc1489) +# +canonical is a koi8-r +alias is a koi8r +alias is a koi8 +alias is a koi + +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0a 0x000a # LINE FEED +0x0b 0x000b # VERTICAL TABULATION +0x0c 0x000c # FORM FEED +0x0d 0x000d # CARRIAGE RETURN +0x0e 0x000e # SHIFT OUT +0x0f 0x000f # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1a 0x001a # SUBSTITUTE +0x1b 0x001b # ESCAPE +0x1c 0x001c # FILE SEPARATOR +0x1d 0x001d # GROUP SEPARATOR +0x1e 0x001e # RECORD SEPARATOR +0x1f 0x001f # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2a 0x002a # ASTERISK +0x2b 0x002b # PLUS SIGN +0x2c 0x002c # COMMA +0x2d 0x002d # HYPHEN-MINUS +0x2e 0x002e # FULL STOP +0x2f 0x002f # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3a 0x003a # COLON +0x3b 0x003b # SEMICOLON +0x3c 0x003c # LESS-THAN SIGN +0x3d 0x003d # EQUALS SIGN +0x3e 0x003e # GREATER-THAN SIGN +0x3f 0x003f # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4a 0x004a # LATIN CAPITAL LETTER J +0x4b 0x004b # LATIN CAPITAL LETTER K +0x4c 0x004c # LATIN CAPITAL LETTER L +0x4d 0x004d # LATIN CAPITAL LETTER M +0x4e 0x004e # LATIN CAPITAL LETTER N +0x4f 0x004f # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5a 0x005a # LATIN CAPITAL LETTER Z +0x5b 0x005b # LEFT SQUARE BRACKET +0x5c 0x005c # REVERSE SOLIDUS +0x5d 0x005d # RIGHT SQUARE BRACKET +0x5e 0x005e # CIRCUMFLEX ACCENT +0x5f 0x005f # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6a 0x006a # LATIN SMALL LETTER J +0x6b 0x006b # LATIN SMALL LETTER K +0x6c 0x006c # LATIN SMALL LETTER L +0x6d 0x006d # LATIN SMALL LETTER M +0x6e 0x006e # LATIN SMALL LETTER N +0x6f 0x006f # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7a 0x007a # LATIN SMALL LETTER Z +0x7b 0x007b # LEFT CURLY BRACKET +0x7c 0x007c # VERTICAL LINE +0x7d 0x007d # RIGHT CURLY BRACKET +0x7e 0x007e # TILDE +0x7f 0x007f # DELETE +0x80 0x2500 # BOX DRAWINGS LIGHT HORIZONTAL +0x81 0x2502 # BOX DRAWINGS LIGHT VERTICAL +0x82 0x250c # BOX DRAWINGS LIGHT DOWN AND RIGHT +0x83 0x2510 # BOX DRAWINGS LIGHT DOWN AND LEFT +0x84 0x2514 # BOX DRAWINGS LIGHT UP AND RIGHT +0x85 0x2518 # BOX DRAWINGS LIGHT UP AND LEFT +0x86 0x251c # BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0x87 0x2524 # BOX DRAWINGS LIGHT VERTICAL AND LEFT +0x88 0x252c # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0x89 0x2534 # BOX DRAWINGS LIGHT UP AND HORIZONTAL +0x8a 0x253c # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0x8b 0x2580 # UPPER HALF BLOCK +0x8c 0x2584 # LOWER HALF BLOCK +0x8d 0x2588 # FULL BLOCK +0x8e 0x258c # LEFT HALF BLOCK +0x8f 0x2590 # RIGHT HALF BLOCK +0x90 0x2591 # LIGHT SHADE +0x91 0x2592 # MEDIUM SHADE +0x92 0x2593 # DARK SHADE +0x93 0x2320 # UPPER HALF OF INTEGRAL +0x94 0x25a0 # BLACK SQUARE +0x95 0x2219 # BULLET OPERATOR +0x96 0x221a # SQUARE ROOT +0x97 0x2248 # ALMOST EQUAL TO +0x98 0x2264 # LESS-THAN OR EQUAL TO +0x99 0x2265 # GREATER-THAN OR EQUAL TO +0x9a 0x00a0 # NO-BREAK SPACE +0x9b 0x2321 # LOWER HALF OF INTEGRAL +0x9c 0x00b0 # DEGREE SIGN +0x9d 0x00b2 # SUPERSCRIPT TWO +0x9e 0x00b7 # MIDDLE DOT +0x9f 0x00f7 # DIVISION SIGN +0xa0 0x2550 # BOX DRAWINGS DOUBLE HORIZONTAL +0xa1 0x2551 # BOX DRAWINGS DOUBLE VERTICAL +0xa2 0x2552 # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xa3 0x0451 # CYRILLIC SMALL LETTER IO +0xa4 0x2553 # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xa5 0x2554 # BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xa6 0x2555 # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xa7 0x2556 # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xa8 0x2557 # BOX DRAWINGS DOUBLE DOWN AND LEFT +0xa9 0x2558 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xaa 0x2559 # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xab 0x255a # BOX DRAWINGS DOUBLE UP AND RIGHT +0xac 0x255b # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xad 0x255c # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xae 0x255d # BOX DRAWINGS DOUBLE UP AND LEFT +0xaf 0x255e # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xb0 0x255f # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xb1 0x2560 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xb2 0x2561 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xb3 0x0401 # CYRILLIC CAPITAL LETTER IO +0xb4 0x2562 # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xb5 0x2563 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xb6 0x2564 # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xb7 0x2565 # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xb8 0x2566 # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xb9 0x2567 # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xba 0x2568 # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xbb 0x2569 # BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xbc 0x256a # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xbd 0x256b # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xbe 0x256c # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xbf 0x00a9 # COPYRIGHT SIGN +0xc0 0x044e # CYRILLIC SMALL LETTER YU +0xc1 0x0430 # CYRILLIC SMALL LETTER A +0xc2 0x0431 # CYRILLIC SMALL LETTER BE +0xc3 0x0446 # CYRILLIC SMALL LETTER TSE +0xc4 0x0434 # CYRILLIC SMALL LETTER DE +0xc5 0x0435 # CYRILLIC SMALL LETTER IE +0xc6 0x0444 # CYRILLIC SMALL LETTER EF +0xc7 0x0433 # CYRILLIC SMALL LETTER GHE +0xc8 0x0445 # CYRILLIC SMALL LETTER HA +0xc9 0x0438 # CYRILLIC SMALL LETTER I +0xca 0x0439 # CYRILLIC SMALL LETTER SHORT I +0xcb 0x043a # CYRILLIC SMALL LETTER KA +0xcc 0x043b # CYRILLIC SMALL LETTER EL +0xcd 0x043c # CYRILLIC SMALL LETTER EM +0xce 0x043d # CYRILLIC SMALL LETTER EN +0xcf 0x043e # CYRILLIC SMALL LETTER O +0xd0 0x043f # CYRILLIC SMALL LETTER PE +0xd1 0x044f # CYRILLIC SMALL LETTER YA +0xd2 0x0440 # CYRILLIC SMALL LETTER ER +0xd3 0x0441 # CYRILLIC SMALL LETTER ES +0xd4 0x0442 # CYRILLIC SMALL LETTER TE +0xd5 0x0443 # CYRILLIC SMALL LETTER U +0xd6 0x0436 # CYRILLIC SMALL LETTER ZHE +0xd7 0x0432 # CYRILLIC SMALL LETTER VE +0xd8 0x044c # CYRILLIC SMALL LETTER SOFT SIGN +0xd9 0x044b # CYRILLIC SMALL LETTER YERU +0xda 0x0437 # CYRILLIC SMALL LETTER ZE +0xdb 0x0448 # CYRILLIC SMALL LETTER SHA +0xdc 0x044d # CYRILLIC SMALL LETTER E +0xdd 0x0449 # CYRILLIC SMALL LETTER SHCHA +0xde 0x0447 # CYRILLIC SMALL LETTER CHE +0xdf 0x044a # CYRILLIC SMALL LETTER HARD SIGN +0xe0 0x042e # CYRILLIC CAPITAL LETTER YU +0xe1 0x0410 # CYRILLIC CAPITAL LETTER A +0xe2 0x0411 # CYRILLIC CAPITAL LETTER BE +0xe3 0x0426 # CYRILLIC CAPITAL LETTER TSE +0xe4 0x0414 # CYRILLIC CAPITAL LETTER DE +0xe5 0x0415 # CYRILLIC CAPITAL LETTER IE +0xe6 0x0424 # CYRILLIC CAPITAL LETTER EF +0xe7 0x0413 # CYRILLIC CAPITAL LETTER GHE +0xe8 0x0425 # CYRILLIC CAPITAL LETTER HA +0xe9 0x0418 # CYRILLIC CAPITAL LETTER I +0xea 0x0419 # CYRILLIC CAPITAL LETTER SHORT I +0xeb 0x041a # CYRILLIC CAPITAL LETTER KA +0xec 0x041b # CYRILLIC CAPITAL LETTER EL +0xed 0x041c # CYRILLIC CAPITAL LETTER EM +0xee 0x041d # CYRILLIC CAPITAL LETTER EN +0xef 0x041e # CYRILLIC CAPITAL LETTER O +0xf0 0x041f # CYRILLIC CAPITAL LETTER PE +0xf1 0x042f # CYRILLIC CAPITAL LETTER YA +0xf2 0x0420 # CYRILLIC CAPITAL LETTER ER +0xf3 0x0421 # CYRILLIC CAPITAL LETTER ES +0xf4 0x0422 # CYRILLIC CAPITAL LETTER TE +0xf5 0x0423 # CYRILLIC CAPITAL LETTER U +0xf6 0x0416 # CYRILLIC CAPITAL LETTER ZHE +0xf7 0x0412 # CYRILLIC CAPITAL LETTER VE +0xf8 0x042c # CYRILLIC CAPITAL LETTER SOFT SIGN +0xf9 0x042b # CYRILLIC CAPITAL LETTER YERU +0xfa 0x0417 # CYRILLIC CAPITAL LETTER ZE +0xfb 0x0428 # CYRILLIC CAPITAL LETTER SHA +0xfc 0x042d # CYRILLIC CAPITAL LETTER E +0xfd 0x0429 # CYRILLIC CAPITAL LETTER SHCHA +0xfe 0x0427 # CYRILLIC CAPITAL LETTER CHE +0xff 0x042a # CYRILLIC CAPITAL LETTER HARD SIGN diff --git a/libs/libpocoext/charsets/txts/KOI8UEncoding.txt b/libs/libpocoext/charsets/txts/KOI8UEncoding.txt new file mode 100644 index 00000000000..4c38c947d0a --- /dev/null +++ b/libs/libpocoext/charsets/txts/KOI8UEncoding.txt @@ -0,0 +1,261 @@ +# +# koi8-u to unicode translation from rfc 2319 +# +canonical is a koi8-u +alias is a koi8u + +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0a 0x000a # LINE FEED +0x0b 0x000b # VERTICAL TABULATION +0x0c 0x000c # FORM FEED +0x0d 0x000d # CARRIAGE RETURN +0x0e 0x000e # SHIFT OUT +0x0f 0x000f # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1a 0x001a # SUBSTITUTE +0x1b 0x001b # ESCAPE +0x1c 0x001c # FILE SEPARATOR +0x1d 0x001d # GROUP SEPARATOR +0x1e 0x001e # RECORD SEPARATOR +0x1f 0x001f # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x80 0x2500 # BOX DRAWINGS LIGHT HORIZONTAL +0x81 0x2502 # BOX DRAWINGS LIGHT VERTICAL +0x82 0x250C # BOX DRAWINGS LIGHT DOWN AND RIGHT +0x83 0x2510 # BOX DRAWINGS LIGHT DOWN AND LEFT +0x84 0x2514 # BOX DRAWINGS LIGHT UP AND RIGHT +0x85 0x2518 # BOX DRAWINGS LIGHT UP AND LEFT +0x86 0x251C # BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0x87 0x2524 # BOX DRAWINGS LIGHT VERTICAL AND LEFT +0x88 0x252C # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0x89 0x2534 # BOX DRAWINGS LIGHT UP AND HORIZONTAL +0x8A 0x253C # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0x8B 0x2580 # UPPER HALF BLOCK +0x8C 0x2584 # LOWER HALF BLOCK +0x8D 0x2588 # FULL BLOCK +0x8E 0x258C # LEFT HALF BLOCK +0x8F 0x2590 # RIGHT HALF BLOCK +0x90 0x2591 # LIGHT SHADE +0x91 0x2592 # MEDIUM SHADE +0x92 0x2593 # DARK SHADE +0x93 0x2320 # TOP HALF INTEGRAL +0x94 0x25A0 # BLACK SQUARE +0x95 0x2022 # BULLET +0x96 0x221A # SQUARE ROOT +0x97 0x2248 # ALMOST EQUAL TO +0x98 0x2264 # LESS-THAN OR EQUAL TO +0x99 0x2265 # GREATER-THAN OR EQUAL TO +0x9A 0x00A0 # NO-BREAK SPACE +0x9B 0x2321 # BOTTOM HALF INTEGRAL +0x9C 0x00B0 # DEGREE SIGN +0x9D 0x00B2 # SUPERSCRIPT TWO +0x9E 0x00B7 # MIDDLE DOT +0x9F 0x00F7 # DIVISION SIGN +0xA0 0x2550 # BOX DRAWINGS DOUBLE HORIZONTAL +0xA1 0x2551 # BOX DRAWINGS DOUBLE VERTICAL +0xA2 0x2552 # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xA3 0x0451 # CYRILLIC SMALL LETTER IO +0xA4 0x0454 # CYRILLIC SMALL LETTER UKRAINIAN IE +0xA5 0x2554 # BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xA6 0x0456 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0xA7 0x0457 # CYRILLIC SMALL LETTER YI +0xA8 0x2557 # BOX DRAWINGS DOUBLE DOWN AND LEFT +0xA9 0x2558 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xAA 0x2559 # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xAB 0x255A # BOX DRAWINGS DOUBLE UP AND RIGHT +0xAC 0x255B # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xAD 0x0491 # CYRILLIC SMALL LETTER GHE WITH UPTURN +0xAE 0x255D # BOX DRAWINGS DOUBLE UP AND LEFT +0xAF 0x255E # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xB0 0x255F # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xB1 0x2560 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xB2 0x2561 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xB3 0x0401 # CYRILLIC CAPITAL LETTER IO +0xB4 0x0404 # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xB5 0x2563 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xB6 0x0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0xB7 0x0407 # CYRILLIC CAPITAL LETTER YI +0xB8 0x2566 # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xB9 0x2567 # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xBA 0x2568 # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xBB 0x2569 # BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xBC 0x256A # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xBD 0x0490 # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0xBE 0x256C # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xBF 0x00A9 # COPYRIGHT SIGN +0xC0 0x044E # CYRILLIC SMALL LETTER YU +0xC1 0x0430 # CYRILLIC SMALL LETTER A +0xC2 0x0431 # CYRILLIC SMALL LETTER BE +0xC3 0x0446 # CYRILLIC SMALL LETTER TSE +0xC4 0x0434 # CYRILLIC SMALL LETTER DE +0xC5 0x0435 # CYRILLIC SMALL LETTER IE +0xC6 0x0444 # CYRILLIC SMALL LETTER EF +0xC7 0x0433 # CYRILLIC SMALL LETTER GHE +0xC8 0x0445 # CYRILLIC SMALL LETTER HA +0xC9 0x0438 # CYRILLIC SMALL LETTER I +0xCA 0x0439 # CYRILLIC SMALL LETTER SHORT I +0xCB 0x043A # CYRILLIC SMALL LETTER KA +0xCC 0x043B # CYRILLIC SMALL LETTER EL +0xCD 0x043C # CYRILLIC SMALL LETTER EM +0xCE 0x043D # CYRILLIC SMALL LETTER EN +0xCF 0x043E # CYRILLIC SMALL LETTER O +0xD0 0x043F # CYRILLIC SMALL LETTER PE +0xD1 0x044F # CYRILLIC SMALL LETTER YA +0xD2 0x0440 # CYRILLIC SMALL LETTER ER +0xD3 0x0441 # CYRILLIC SMALL LETTER ES +0xD4 0x0442 # CYRILLIC SMALL LETTER TE +0xD5 0x0443 # CYRILLIC SMALL LETTER U +0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE +0xD7 0x0432 # CYRILLIC SMALL LETTER VE +0xD8 0x044C # CYRILLIC SMALL LETTER SOFT SIGN +0xD9 0x044B # CYRILLIC SMALL LETTER YERU +0xDA 0x0437 # CYRILLIC SMALL LETTER ZE +0xDB 0x0448 # CYRILLIC SMALL LETTER SHA +0xDC 0x044D # CYRILLIC SMALL LETTER E +0xDD 0x0449 # CYRILLIC SMALL LETTER SHCHA +0xDE 0x0447 # CYRILLIC SMALL LETTER CHE +0xDF 0x044A # CYRILLIC SMALL LETTER HARD SIGN +0xE0 0x042E # CYRILLIC CAPITAL LETTER YU +0xE1 0x0410 # CYRILLIC CAPITAL LETTER A +0xE2 0x0411 # CYRILLIC CAPITAL LETTER BE +0xE3 0x0426 # CYRILLIC CAPITAL LETTER TSE +0xE4 0x0414 # CYRILLIC CAPITAL LETTER DE +0xE5 0x0415 # CYRILLIC CAPITAL LETTER IE +0xE6 0x0424 # CYRILLIC CAPITAL LETTER EF +0xE7 0x0413 # CYRILLIC CAPITAL LETTER GHE +0xE8 0x0425 # CYRILLIC CAPITAL LETTER HA +0xE9 0x0418 # CYRILLIC CAPITAL LETTER I +0xEA 0x0419 # CYRILLIC CAPITAL LETTER SHORT I +0xEB 0x041A # CYRILLIC CAPITAL LETTER KA +0xEC 0x041B # CYRILLIC CAPITAL LETTER EL +0xED 0x041C # CYRILLIC CAPITAL LETTER EM +0xEE 0x041D # CYRILLIC CAPITAL LETTER EN +0xEF 0x041E # CYRILLIC CAPITAL LETTER O +0xF0 0x041F # CYRILLIC CAPITAL LETTER PE +0xF1 0x042F # CYRILLIC CAPITAL LETTER YA +0xF2 0x0420 # CYRILLIC CAPITAL LETTER ER +0xF3 0x0421 # CYRILLIC CAPITAL LETTER ES +0xF4 0x0422 # CYRILLIC CAPITAL LETTER TE +0xF5 0x0423 # CYRILLIC CAPITAL LETTER U +0xF6 0x0416 # CYRILLIC CAPITAL LETTER ZHE +0xF7 0x0412 # CYRILLIC CAPITAL LETTER VE +0xF8 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN +0xF9 0x042B # CYRILLIC CAPITAL LETTER YERU +0xFA 0x0417 # CYRILLIC CAPITAL LETTER ZE +0xFB 0x0428 # CYRILLIC CAPITAL LETTER SHA +0xFC 0x042D # CYRILLIC CAPITAL LETTER E +0xFD 0x0429 # CYRILLIC CAPITAL LETTER SHCHA +0xFE 0x0427 # CYRILLIC CAPITAL LETTER CHE +0xFF 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN diff --git a/libs/libpocoext/utils/make-charset.cpp b/libs/libpocoext/utils/make-charset.cpp new file mode 100644 index 00000000000..e40dcb3057a --- /dev/null +++ b/libs/libpocoext/utils/make-charset.cpp @@ -0,0 +1,408 @@ +/** + * @file + * @author Sergey N. Yatskevich + * @brief + */ +/* + * $Id$ + */ +#include +#include +#include +#include +#include +#include +#include + +class Encoding +{ + std::list m_names; + + int m_charset[256]; + unsigned char m_reverse_charset[256][256]; + + void buildNamesTable (std::ostream& _os, const std::string& _class_name) const; + void buildMap (std::ostream& _os, const std::string& _class_name) const; + void buildReverseMap (std::ostream& _os, const std::string& _class_name) const; + +public: + Encoding () + { + for (int i = 0; i < 256; ++i) + m_charset[i] = -1; + + memset (m_reverse_charset, 0, sizeof (m_reverse_charset)); + }; + + void read (std::istream& _is); + + void buildHead (std::ostream& _os, const std::string& _class_name) const; + void buildBody (std::ostream& _os, const std::string& _class_name) const; +}; + +void +Encoding::read (std::istream& _is) +{ + // Read stream and build charset and name list + while (_is.good ()) + { + std::string line; + std::getline (_is, line); + + char s[1024]; // string + int c; // 8-bit char + long int uc; // unicode (32-bit) char + + if (std::sscanf (line.c_str (), "%i %li", &c, &uc) == 2) + { + if ((c < 0) || (c > 255) || (uc < 0) || ((uc > 0xFEFE) && (uc != 0xFFFE))) + throw std::runtime_error ("Invalid charset file"); + + m_charset[c] = uc; + } + + else if (std::sscanf (line.c_str (), "canonical is a %s", s) == 1) + m_names.push_front (s); + + else if (std::sscanf (line.c_str (), "alias is a %s", s) == 1) + m_names.push_back (s); + } + + // Build reverse map + for (int i = 0; i < 256; ++i) + { + if (m_charset[i] >= 0) + m_reverse_charset[(m_charset[i] >> 8) & 0xff][m_charset[i] & 0xff] = i; + } +} + +void +Encoding::buildNamesTable (std::ostream& _os, const std::string& _class_name) const +{ + _os << "const char* Poco::" << _class_name << "::_names[] =\n" + "{\n"; + for (std::list::const_iterator n = m_names.begin (); n != m_names.end (); ++n) + _os << "\t\"" << *n << "\",\n"; + _os << "\tNULL\n" + "};\n\n\n"; +} + +void +Encoding::buildMap (std::ostream& _os, const std::string& _class_name) const +{ + _os << "const Poco::TextEncoding::CharacterMap Poco::" << _class_name << "::_map =\n" + "{\n"; + for (int i = 0; i < 256; ++i) + { + if ((i & 0x0f) == 0x00) + _os << '\t'; + + if (m_charset[i] >= 0) + { + _os << "0x" + << std::hex << std::setw (4) << std::setfill ('0') + << m_charset[i] << ", "; + } + else + { + _os + << std::dec << std::setw (6) << std::setfill (' ') + << m_charset[i] << ", "; + } + + if ((i & 0x0f) == 0x0f) + _os << std::endl; + } + _os << "};\n\n\n"; +} + +void +Encoding::buildReverseMap (std::ostream& _os, const std::string& _class_name) const +{ + for (int i = 0; i < 256; ++i) + { + int j = 0; + for (; j < 256; ++j) + { + if (m_reverse_charset[i][j] != 0) + break; + } + + // skip "empty" lines + if (j == 256) + continue; + + _os << "const unsigned char Poco::" << _class_name << "::_0x" + << std::hex << std::setw (4) << std::setfill ('0') + << (i << 8) << "_map[256] =\n{\n"; + + for (int j = 0; j < 256; ++j) + { + if ((j & 0x0f) == 0x00) + _os << '\t'; + + if (m_reverse_charset[i][j] > 0) + { + _os << "0x" + << std::hex << std::setw (2) << std::setfill ('0') + << (unsigned int)m_reverse_charset[i][j] << ", "; + } + else + { + _os + << std::dec << std::setw (4) << std::setfill (' ') + << (unsigned int)m_reverse_charset[i][j] << ", "; + } + + if ((j & 0x0f) == 0x0f) + _os << std::endl; + } + _os << "};\n\n\n"; + } +} + +void +Encoding::buildHead (std::ostream& _os, const std::string& _class_name) const +{ + _os << + "// \n" + "// " << _class_name << ".h\n" + "// \n" + "// $Id$\n" + "// \n" + "// Library: Foundation\n" + "// Package: Text\n" + "// Module: " << _class_name << "\n" + "// \n" + "// Definition of the " << _class_name << " class.\n" + "// \n" + "// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH.\n" + "// and Contributors.\n" + "// \n" + "// Permission is hereby granted, free of charge, to any person or organization\n" + "// obtaining a copy of the software and accompanying documentation covered by\n" + "// this license (the \"Software\") to use, reproduce, display, distribute,\n" + "// execute, and transmit the Software, and to prepare derivative works of the\n" + "// Software, and to permit third-parties to whom the Software is furnished to\n" + "// do so, all subject to the following:\n" + "// \n" + "// The copyright notices in the Software and this entire statement, including\n" + "// the above license grant, this restriction and the following disclaimer,\n" + "// must be included in all copies of the Software, in whole or in part, and\n" + "// all derivative works of the Software, unless such copies or derivative\n" + "// works are solely in the form of machine-executable object code generated by\n" + "// a source language processor.\n" + "// \n" + "// THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" + "// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" + "// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT\n" + "// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE\n" + "// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,\n" + "// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n" + "// DEALINGS IN THE SOFTWARE.\n" + "//\n" + "// This file is generated automatically. Do not edit it.\n" + "//\n\n\n" + "#ifndef Foundation_" << _class_name << "_INCLUDED\n" + "#define Foundation_" << _class_name << "_INCLUDED\n\n\n"; + + _os << "#include \n"; + _os << "#include \n"; + _os << "#include \n\n\n"; + + _os << "namespace Poco\n{\n"; + + _os << "\tclass Foundation_API " << _class_name << " : public Poco::TextEncoding\n"; + _os << "\t{\n"; + _os << "\t\tstatic const char* _names[];\n"; + _os << "\t\tstatic const CharacterMap _map;\n"; + + for (int i = 0; i < 256; ++i) + { + int j = 0; + for (; j < 256; ++j) + { + if (m_reverse_charset[i][j] != 0) + break; + } + + // skip empty lines + if (j == 256) + continue; + + _os << "\t\tstatic const unsigned char _0x" + << std::hex << std::setw (4) << std::setfill ('0') << (i << 8) + << "_map[256];\n"; + } + + _os << "\n\tpublic:\n" + "\t\t" << _class_name << "();\n" + "\t\t~" << _class_name << "();\n\n"; + _os << "\t\tconst char* canonicalName() const;\n"; + _os << "\t\tbool isA(const std::string& encodingName) const;\n"; + _os << "\t\tconst CharacterMap& characterMap() const;\n"; + _os << "\t\tint convert(const unsigned char* bytes) const;\n"; + _os << "\t\tint convert(int ch, unsigned char* bytes, int length) const;\n"; + _os << "\t};\n"; + _os << "}\n\n"; + + _os << "#endif // Foundation_" << _class_name << "_INCLUDED\n"; +} + +void +Encoding::buildBody (std::ostream& _os, const std::string& _class_name) const +{ + _os << + "// \n" + "// " << _class_name << ".cpp\n" + "// \n" + "// $Id$\n" + "// \n" + "// Library: Foundation\n" + "// Package: Text\n" + "// Module: " << _class_name << "\n" + "// \n" + "// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH.\n" + "// and Contributors.\n" + "// \n" + "// Permission is hereby granted, free of charge, to any person or organization\n" + "// obtaining a copy of the software and accompanying documentation covered by\n" + "// this license (the \"Software\") to use, reproduce, display, distribute,\n" + "// execute, and transmit the Software, and to prepare derivative works of the\n" + "// Software, and to permit third-parties to whom the Software is furnished to\n" + "// do so, all subject to the following:\n" + "// \n" + "// The copyright notices in the Software and this entire statement, including\n" + "// the above license grant, this restriction and the following disclaimer,\n" + "// must be included in all copies of the Software, in whole or in part, and\n" + "// all derivative works of the Software, unless such copies or derivative\n" + "// works are solely in the form of machine-executable object code generated by\n" + "// a source language processor.\n" + "// \n" + "// THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" + "// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" + "// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT\n" + "// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE\n" + "// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,\n" + "// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n" + "// DEALINGS IN THE SOFTWARE.\n" + "//\n" + "// This file is generated automatically. Do not edit it.\n" + "//\n\n\n"; + + _os << "#include \n"; + _os << "#include \n\n\n"; + + buildNamesTable (_os, _class_name); + buildMap (_os, _class_name); + buildReverseMap (_os, _class_name); + + _os << "Poco::" << _class_name << "::" << _class_name << "()\n{}\n\n\n"; + _os << "Poco::" << _class_name << "::~" << _class_name << "()\n{}\n\n\n"; + + _os << "const char* Poco::" << _class_name << "::canonicalName() const\n" + "{\n" + "\treturn _names[0];\n" + "}\n\n\n"; + + _os << "bool Poco::" << _class_name << "::isA(const std::string& encodingName) const\n" + "{\n" + "\tfor (const char** name = _names; *name; ++name)\n" + "\t{\n" + "\t\tif (Poco::icompare (encodingName, *name) == 0)\n" + "\t\t\treturn true;\n" + "\t}\n\n" + "\treturn false;\n" + "}\n\n\n"; + + _os << "const Poco::TextEncoding::CharacterMap& Poco::" << _class_name << "::characterMap() const\n" + "{\n" + "\treturn _map;\n" + "}\n\n\n"; + + _os << "int Poco::" << _class_name << "::convert(const unsigned char* bytes) const\n" + "{\n" + "\treturn _map[*bytes];\n" + "}\n\n\n"; + + _os << "int Poco::" << _class_name << "::convert(int ch, unsigned char* bytes, int length) const\n" + "{\n"; + + _os << "\tif (ch == 0x0000)\n" + "\t{\n" + "\t\tif (bytes && (length >= 1))\n" + "\t\t\t*bytes = 0x00;\n" + "\t\treturn 1;\n" + "\t}\n\n" + "\telse"; + for (int i = 0; i < 256; ++i) + { + int j = 0; + for (; j < 256; ++j) + { + if (m_reverse_charset[i][j] != 0) + break; + } + + // Empty line + if (j == 256) + continue; + + _os << " if ((ch & 0xff00) == 0x" << std::hex << std::setw (4) << std::setfill ('0') << (i << 8) << ")\n" + "\t{\n" + "\t\tif ((unsigned char)_0x" << std::hex << std::setw (4) << std::setfill ('0') << (i << 8) << "_map[ch & 0x00ff] > 0)\n" + "\t\t{\n" + "\t\t\tif (bytes && (length >= 1))\n" + "\t\t\t\t*bytes = (unsigned char)_0x" << std::hex << std::setw (4) << std::setfill ('0') << (i << 8) << "_map[ch & 0x00ff];\n" + "\t\t\treturn 1;\n" + "\t\t}\n" + "\t\treturn 0;\n" + "\t}\n\n" + "\telse"; + } + _os << "\n\t\treturn 0;\n" + "}\n"; +} + +int +main (int _argc, char* _argv[]) +{ + Encoding e; + + e.read (std::cin); + + std::string class_name; + std::string cpp_name; + std::string h_name; + if (_argc > 1) + { + class_name = std::string(_argv[1]); + if (_argc > 2) + { + cpp_name = std::string(_argv[2]); + if (_argc > 3) + h_name = std::string(_argv[3]); + else + h_name = cpp_name.substr(0, cpp_name.size() - 3) + "h"; + } + else + { + cpp_name = class_name + ".cpp"; + h_name = class_name + ".h"; + } + } + else + { + class_name = "UNDEFINED"; + cpp_name = "UNDEFINED.cpp"; + h_name = "UNDEFINED.h"; + } + + std::ofstream head (h_name.c_str()); + std::ofstream body (cpp_name.c_str()); + + e.buildHead (head, class_name); + e.buildBody (body, class_name); + + return 0; +}