|
| 1 | +/* |
| 2 | + * Copyright (C) 2023 The C++ Plus Project. |
| 3 | + * This file is part of the cppp-reiconv Library. |
| 4 | + * |
| 5 | + * The cppp-reiconv Library is free software; you can redistribute it |
| 6 | + * and/or modify it under the terms of the GNU Lesser General Public |
| 7 | + * License as published by the Free Software Foundation; either version 3 |
| 8 | + * of the License, or (at your option) any later version. |
| 9 | + * |
| 10 | + * The cppp-reiconv Library is distributed in the hope that it will be |
| 11 | + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | + * Lesser General Public License for more details. |
| 14 | + * |
| 15 | + * You should have received a copy of the GNU Lesser General Public |
| 16 | + * License along with the cppp-reiconv Library; see the file COPYING. |
| 17 | + * If not, see <https://www.gnu.org/licenses/>. |
| 18 | + */ |
| 19 | + |
| 20 | +/* |
| 21 | + Complete check of a stateless encoding. |
| 22 | +*/ |
| 23 | + |
| 24 | +#include <iostream> |
| 25 | +#include <cstring> |
| 26 | +#include <regex> |
| 27 | + |
| 28 | +#include "file_utils.hpp" |
| 29 | +#include "table-from.hpp" |
| 30 | +#include "table-to.hpp" |
| 31 | +#include "sort.hpp" |
| 32 | +#include "uniq-u.hpp" |
| 33 | + |
| 34 | +std::string replace(const std::string& src, const std::string& from, const std::string& to) |
| 35 | +{ |
| 36 | + std::string result = src; |
| 37 | + size_t pos = 0; |
| 38 | + |
| 39 | + while ((pos = result.find(from, pos)) != std::string::npos) |
| 40 | + { |
| 41 | + result.replace(pos, from.length(), to); |
| 42 | + pos += to.length(); |
| 43 | + } |
| 44 | + |
| 45 | + return result; |
| 46 | +} |
| 47 | + |
| 48 | +std::string srcdir, charset; |
| 49 | + |
| 50 | +void check2_pre_process(const std::string& input_file_path, const std::string& output_file_path) |
| 51 | +{ |
| 52 | + std::ifstream input_file(input_file_path); |
| 53 | + std::ofstream output_file(output_file_path, std::ios::trunc); |
| 54 | + |
| 55 | + if (input_file.is_open() && output_file.is_open()) |
| 56 | + { |
| 57 | + std::string line; |
| 58 | + std::regex pattern("\t.* 0x"); |
| 59 | + |
| 60 | + while (std::getline(input_file, line)) |
| 61 | + { |
| 62 | + if (!std::regex_search(line, pattern)) |
| 63 | + { |
| 64 | + output_file << line << '\n'; |
| 65 | + } |
| 66 | + } |
| 67 | + |
| 68 | + input_file.close(); |
| 69 | + output_file.close(); |
| 70 | + } |
| 71 | + else |
| 72 | + { |
| 73 | + error(input_file_path + " " + output_file_path, "Unable to open file."); |
| 74 | + } |
| 75 | +} |
| 76 | + |
| 77 | +// Usage: check-stateful SRCDIR CHARSET |
| 78 | +int main(int argc, char* argv[]) |
| 79 | +{ |
| 80 | + if(argc < 3) |
| 81 | + { |
| 82 | + std::cerr << "Usage: check-stateful SRCDIR CHARSET\n"; |
| 83 | + return 1; |
| 84 | + } |
| 85 | + srcdir = argv[1]; |
| 86 | + charset = argv[2]; |
| 87 | + |
| 88 | + // Charset, modified for use in filenames. |
| 89 | + std::string charsetf = replace(charset, ":", "-"); |
| 90 | + |
| 91 | + // Iconv in one direction. |
| 92 | + test::table_from("tmp-" + charsetf + ".TXT", charset); |
| 93 | + |
| 94 | + // Iconv in the other direction. |
| 95 | + test::table_to("tmp-" + charsetf + ".INVERSE.UNSORTED.TXT", charset); |
| 96 | + sort_file("tmp-" + charsetf + ".INVERSE.UNSORTED.TXT", "tmp-" + charsetf + ".INVERSE.TXT"); |
| 97 | + |
| 98 | + // Check 1: charmap and iconv forward should be identical. |
| 99 | + assert_compare_file(srcdir + "/" + charsetf + ".TXT", "tmp-" + charsetf + ".TXT"); |
| 100 | + |
| 101 | + // Check 2: the difference between the charmap and iconv backward. |
| 102 | + check2_pre_process(srcdir + "/" + charsetf + ".TXT", "tmp-noprecomposed-" + charsetf + ".TXT"); |
| 103 | + |
| 104 | + if(file_exists(srcdir + "/" + charsetf + ".IRREVERSIBLE.TXT")) |
| 105 | + { |
| 106 | + std::vector<std::string> files = { "tmp-noprecomposed-" + charsetf + ".TXT", srcdir + "/" + charsetf + ".IRREVERSIBLE.TXT" }; |
| 107 | + merge_files(files, "tmp-orig-" + charsetf + ".INVERSE.UNSORTED.TXT"); |
| 108 | + sort_file("tmp-orig-" + charsetf + ".INVERSE.UNSORTED.TXT", "tmp-orig-" + charsetf + ".INVERSE.UNUNIQUED.TXT"); |
| 109 | + uniq_u("tmp-orig-" + charsetf + ".INVERSE.UNUNIQUED.TXT", "tmp-orig-" + charsetf + ".INVERSE.TXT"); |
| 110 | + } |
| 111 | + else |
| 112 | + { |
| 113 | + copy_file("tmp-noprecomposed-" + charsetf + ".TXT", "tmp-orig-" + charsetf + ".INVERSE.TXT"); |
| 114 | + } |
| 115 | + assert_compare_file("tmp-orig-" + charsetf + ".INVERSE.TXT", "tmp-" + charsetf + ".INVERSE.TXT"); |
| 116 | + |
| 117 | + remove_file("tmp-" + charsetf + ".TXT"); |
| 118 | + remove_file("tmp-" + charsetf + ".INVERSE.UNSORTED.TXT"); |
| 119 | + remove_file("tmp-" + charsetf + ".INVERSE.TXT"); |
| 120 | + remove_file("tmp-orig-" + charsetf + ".INVERSE.UNSORTED.TXT"); |
| 121 | + remove_file("tmp-orig-" + charsetf + ".INVERSE.UNUNIQUED.TXT"); |
| 122 | + remove_file("tmp-orig-" + charsetf + ".INVERSE.TXT"); |
| 123 | + remove_file("tmp-noprecomposed-" + charsetf + ".TXT"); |
| 124 | + |
| 125 | + success("check-stateless", charset + " OK."); |
| 126 | + return 0; |
| 127 | +} |
0 commit comments