Comment puis-je simplement consumr des personnages non reconnus?

J’ai réussi à parsingr un fichier pgn grâce à la bibliothèque Boost Spirit, mais il échoue dès qu’il y a des caractères que je n’ai pas “anticipés”.

Voici ma grammaire Esprit:

#include  #include  #include  BOOST_FUSION_ADAPT_STRUCT( loloof64::pgn_tag, (std::ssortingng, key), (std::ssortingng, value) ) BOOST_FUSION_ADAPT_STRUCT( loloof64::game_move, (unsigned, move_number), (std::ssortingng, move_turn), (std::ssortingng, white_move), (std::ssortingng, black_move), (std::ssortingng, result) ) BOOST_FUSION_ADAPT_STRUCT( loloof64::pgn_game, (std::vector, header), (std::vector, moves) ) namespace loloof64 { namespace qi = boost::spirit::qi; namespace ascii = boost::spirit::ascii; namespace phoenix = boost::phoenix; template  struct pgn_parser : qi::grammar<Iterator, std::vector, qi::unused_type> { pgn_parser() : pgn_parser::base_type(games) { using qi::lexeme; using ascii::char_; using qi::uint_; using qi::alnum; using qi::space; using qi::omit; using qi::eol; using qi::lit; quoted_ssortingng %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')]; tag %= '[' >> +alnum >> omit[+space] >> quoted_ssortingng >> ']' >> omit[+eol] ; header %= +tag; move_turn %= qi::ssortingng("...") | qi::ssortingng("."); regular_move %= +char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -qi::ssortingng("ep") ; castle_move %= qi::ssortingng("OOO") | qi::ssortingng("OO"); single_move %= (regular_move | castle_move) >> -(char_('+') | char_('#')) ; result %= qi::ssortingng("1-0") | qi::ssortingng("0-1") | qi::ssortingng("1/2-1/2") | qi::ssortingng("*"); full_move %= uint_ >> move_turn >> omit[*space] >> single_move >> -(omit[+space] >> single_move) >> -(omit[+space] >> result) ; game_description %= full_move >> *(omit[*space] >> full_move); single_game %= -header >> game_description ; games %= single_game >> *(omit[*(space|eol)] >> single_game) ; } qi::rule tag; qi::rule<Iterator, std::vector, qi::unused_type> header; qi::rule quoted_ssortingng; qi::rule result; qi::rule regular_move; qi::rule castle_move; qi::rule single_move; qi::rule move_turn; qi::rule full_move; qi::rule<Iterator, std::vector, qi::unused_type> game_description; qi::rule single_game; qi::rule<Iterator, std::vector, qi::unused_type> games; }; } 

Comment pourrais-je simplement consumr un personnage que je ne pourrais pas “anticiper”? Je veux dire, comment pourrais-je ignorer un caractère que je ne veux pas dans aucune de mes règles de grammaire?

En ce qui concerne les tests:

ici mon en-tête de l’parsingur (pgn_games_extractor.hpp)

 #ifndef PGNGAMESEXTRACTOR_HPP #define PGNGAMESEXTRACTOR_HPP #include  #include  #include  #include  namespace loloof64 { struct pgn_tag { std::ssortingng key; std::ssortingng value; }; struct game_move { unsigned move_number; std::ssortingng move_turn; std::ssortingng white_move; std::ssortingng black_move; std::ssortingng result; }; struct pgn_game { std::vector header; std::vector moves; }; class PgnGamesExtractor { public: PgnGamesExtractor(std::ssortingng inputFilePath); PgnGamesExtractor(std::ifstream &inputFile); /* Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file) */ std::vector getGames() const { return games; } virtual ~PgnGamesExtractor(); protected: private: std::vector games; void parseInput(std::ifstream &inputFile); }; class PgnParsingException : public std::runtime_error { public: PgnParsingException(std::ssortingng message): std::runtime_error(message){} }; class InputFileException : public std::runtime_error { public: InputFileException(std::ssortingng message) : std::runtime_error(message){} }; } #endif // PGNGAMESEXTRACTOR_HPP 

Voici la source de mon parsingur (pgn_games_extractor.cpp):

 #include "pgn_games_extractor.hpp" #include  #include  #include  BOOST_FUSION_ADAPT_STRUCT( loloof64::pgn_tag, (std::ssortingng, key), (std::ssortingng, value) ) BOOST_FUSION_ADAPT_STRUCT( loloof64::game_move, (unsigned, move_number), (std::ssortingng, move_turn), (std::ssortingng, white_move), (std::ssortingng, black_move), (std::ssortingng, result) ) BOOST_FUSION_ADAPT_STRUCT( loloof64::pgn_game, (std::vector, header), (std::vector, moves) ) namespace loloof64 { namespace qi = boost::spirit::qi; namespace ascii = boost::spirit::ascii; namespace phoenix = boost::phoenix; template  struct pgn_parser : qi::grammar<Iterator, std::vector, qi::unused_type> { pgn_parser() : pgn_parser::base_type(games) { using qi::lexeme; using ascii::char_; using qi::uint_; using qi::alnum; using qi::space; using qi::omit; using qi::eol; using qi::lit; quoted_ssortingng %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')]; tag %= '[' >> +alnum >> omit[+space] >> quoted_ssortingng >> ']' >> omit[+eol] ; header %= +tag; move_turn %= qi::ssortingng("...") | qi::ssortingng("."); regular_move %= +char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -qi::ssortingng("ep") ; castle_move %= qi::ssortingng("OOO") | qi::ssortingng("OO"); single_move %= (regular_move | castle_move) >> -(char_('+') | char_('#')) ; result %= qi::ssortingng("1-0") | qi::ssortingng("0-1") | qi::ssortingng("1/2-1/2") | qi::ssortingng("*"); full_move %= uint_ >> move_turn >> omit[*space] >> single_move >> -(omit[+space] >> single_move) >> -(omit[+space] >> result) ; game_description %= full_move >> *(omit[*space] >> full_move); single_game %= -header >> game_description ; games %= single_game >> *(omit[*(space|eol)] >> single_game) ; } qi::rule tag; qi::rule<Iterator, std::vector, qi::unused_type> header; qi::rule quoted_ssortingng; qi::rule result; qi::rule regular_move; qi::rule castle_move; qi::rule single_move; qi::rule move_turn; qi::rule full_move; qi::rule<Iterator, std::vector, qi::unused_type> game_description; qi::rule single_game; qi::rule<Iterator, std::vector, qi::unused_type> games; }; } loloof64::PgnGamesExtractor::PgnGamesExtractor(std::ssortingng inputFilePath) { std::ifstream inputFile(inputFilePath); parseInput(inputFile); } loloof64::PgnGamesExtractor::PgnGamesExtractor(std::ifstream &inputFile) { parseInput(inputFile); } loloof64::PgnGamesExtractor::~PgnGamesExtractor() { //dtor } void loloof64::PgnGamesExtractor::parseInput(std::ifstream &inputFile) { using namespace std; if (! inputFile) throw InputFileException("File does not exist !"); ssortingng content(""); getline(inputFile, content, (char) inputFile.eof()); if (inputFile.fail() || inputFile.bad()) throw new InputFileException("Could not read the input file !"); loloof64::pgn_parser parser; std::vector temp_games; ssortingng::const_iterator iter = content.begin(); ssortingng::const_iterator end = content.end(); bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::eol, temp_games); if (success && iter == end) { games = temp_games; } else { ssortingng error_fragment(iter, end); ssortingng error_message(""); error_message = "Failed to parse the input at :'" + error_fragment + "' !"; throw PgnParsingException(error_message); } } 

Je pose cette question parce que je ne pouvais pas parsingr le pgn suivant: ScotchGambitPgn.zip . Je pense que c’est à cause d’un problème d’encodage avec ce fichier.

J’utilise Spirit 2 et C ++ 11 (Gnu)

Pour ce que cela vaut, voici beaucoup simplifié:

Live On Coliru

 //#define BOOST_SPIRIT_DEBUG #ifndef PGNGAMESEXTRACTOR_HPP #define PGNGAMESEXTRACTOR_HPP #include  #include  #include  #include  namespace loloof64 { struct pgn_tag { std::ssortingng key; std::ssortingng value; }; struct game_move { unsigned move_number; std::ssortingng white_move; std::ssortingng black_move; enum result_t { white_won, black_won, draw, undecided } result; }; struct pgn_game { std::vector header; std::vector moves; }; class PgnGamesExtractor { public: PgnGamesExtractor(std::ssortingng inputFilePath); PgnGamesExtractor(std::istream &inputFile); /* Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file) */ std::vector getGames() const { return games; } virtual ~PgnGamesExtractor(); protected: private: std::vector games; void parseInput(std::istream &inputFile); }; class PgnParsingException : public virtual std::runtime_error { public: PgnParsingException(std::ssortingng message) : std::runtime_error(message) {} }; class InputFileException : public virtual std::runtime_error { public: InputFileException(std::ssortingng message) : std::runtime_error(message) {} }; } #endif // PGNGAMESEXTRACTOR_HPP #include  #include  BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value) BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result) BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves) namespace loloof64 { namespace qi = boost::spirit::qi; template  struct pgn_parser : qi::grammar, qi::space_type> { pgn_parser() : pgn_parser::base_type(games) { using namespace qi; const std::ssortingng no_move; result.add ("1-0", game_move::white_won) ("0-1", game_move::black_won) ("1/2-1/2", game_move::draw) ("*", game_move::undecided); quoted_ssortingng = '"' >> *~char_('"') >> '"'; tag = '[' >> +alnum >> quoted_ssortingng >> ']'; header = +tag; regular_move = lit("OOO") | "OO" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("ep")); single_move = raw [ regular_move >> -char_("+#") ]; full_move = uint_ >> (lexeme["..." >> attr(no_move)] | "." >> single_move) >> (single_move | attr(no_move)) >> -result; game_description = +full_move; single_game = -header >> game_description; games = *single_game; BOOST_SPIRIT_DEBUG_NODES( (tag)(header)(quoted_ssortingng)(regular_move)(single_move) (full_move)(game_description)(single_game)(games) ) } private: qi::rule tag; qi::rule, qi::space_type> header; qi::rule full_move; qi::rule, qi::space_type> game_description; qi::rule single_game; qi::rule, qi::space_type> games; // lexemes qi::symbols result; qi::rule quoted_ssortingng; qi::rule regular_move; qi::rule single_move; }; } loloof64::PgnGamesExtractor::PgnGamesExtractor(std::ssortingng inputFilePath) { std::ifstream inputFile(inputFilePath); parseInput(inputFile); } loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); } loloof64::PgnGamesExtractor::~PgnGamesExtractor() { // dtor } void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) { if (inputFile.fail() || inputFile.bad()) throw new InputFileException("Could not read the input file !"); typedef boost::spirit::istream_iterator It; loloof64::pgn_parser parser; std::vector temp_games; It iter(inputFile >> std::noskipws), end; bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games); if (success && iter == end) { games.swap(temp_games); } else { std::ssortingng error_fragment(iter, end); throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !"); } } int main() { loloof64::PgnGamesExtractor pge(std::cin); // "ScotchGambit.pgn" std::cout << "Parsed " << pge.getGames().size() << " games\n"; for (auto& g : pge.getGames()) for (auto& m : g.moves) std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n"; } 

Remarques:

  • ne pas lire le fichier complet en mémoire ( boost::spirit::istream_iterator )
  • ne pas sauter manuellement (utiliser des skippers)
  • ne pas explicitement lexeme (questions Boost Spirit Skipper )
  • n'utilisez pas% = sinon nécessaire
  • ne pas synthétiser les atsortingbuts inutiles (utilisez raw[] )
  • traite les parties optionnelles de move comme optionnelles, ne stockez pas les drapeaux magiques asymésortingques tels que "..."
  • ne soyez pas trop spécifique (utilisez istream& au lieu de ifstream& )

Probablement d'autres choses que j'ai oubliées. La sortie est par exemple

 Parsed 6166 games 1. e4 e5 2. Nf3 Nc6 3. d4 exd4 4. Bc4 Qf6 5. OO d6 6. Ng5 Nh6 7. f4 Be7 8. e5 Qg6 9. exd6 cxd6 10. c3 dxc3 11. Nxc3 OO 12. Nd5 Bd7 13. Rf3 Bg4 14. Bd3 Bxf3 15. Qxf3 f5 16. Bc4 Kh8 17. Nxe7 Nxe7 18. Qxb7 Qf6 19. Be3 Rfb8 20. Qd7 Rd8 21. Qb7 d5 22. Bb3 Nc6 23. Bxd5 Nd4 24. Rd1 Ne2+ 25. Kf1 Rab8 26. Qxa7 Rxb2 27. Ne6 Qxe6 28. Bxe6 Rxd1+ 29. Kf2 1. e4 e5 2. Nf3 Nc6 3. d4 exd4 4. Bc4 Bc5 5. Ng5 Ne5 6. Bxf7+ Nxf7 7. Nxf7 Bb4+ 8. c3 dxc3 9. bxc3 Bxc3+ 10. Nxc3 Kxf7 11. Qd5+ Kf8 12. Ba3+ d6 13. e5 Qg5 14. exd6 Qxd5 

Comme demandé la traduction X3 simple.

  • moins de lignes de code (10 lignes)
  • temps de compilation réduit de 7.4s à 3.6s (clang)
  • temps de compilation passé de 11.4 à 6.0s (gcc5)
  • temps d’utilisation réduit de 0,80 à 0,55 (clang et gcc)

Les sorties sont identiques (exactement).

Live On Coliru

 //#define BOOST_SPIRIT_DEBUG #ifndef PGNGAMESEXTRACTOR_HPP #define PGNGAMESEXTRACTOR_HPP #include  #include  #include  #include  namespace loloof64 { struct pgn_tag { std::ssortingng key; std::ssortingng value; }; struct game_move { unsigned move_number; std::ssortingng white_move; std::ssortingng black_move; enum result_t { white_won, black_won, draw, undecided } result; }; struct pgn_game { std::vector header; std::vector moves; }; class PgnGamesExtractor { public: PgnGamesExtractor(std::ssortingng inputFilePath); PgnGamesExtractor(std::istream &inputFile); /* Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file) */ std::vector getGames() const { return games; } virtual ~PgnGamesExtractor(); protected: private: std::vector games; void parseInput(std::istream &inputFile); }; class PgnParsingException : public virtual std::runtime_error { public: PgnParsingException(std::ssortingng message) : std::runtime_error(message) {} }; class InputFileException : public virtual std::runtime_error { public: InputFileException(std::ssortingng message) : std::runtime_error(message) {} }; } #endif // PGNGAMESEXTRACTOR_HPP #include  #include  #include  BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value) BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result) BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves) namespace loloof64 { namespace pgn_parser { using namespace boost::spirit::x3; static std::ssortingng const no_move; static auto const result = []{ symbols table; table.add ("1-0", game_move::white_won) ("0-1", game_move::black_won) ("1/2-1/2", game_move::draw) ("*", game_move::undecided); return table; }(); static auto const quoted_ssortingng = lexeme['"' >> *~char_('"') >> '"']; static auto const tag = '[' >> +alnum >> quoted_ssortingng >> ']'; static auto const header = +tag; static auto const regular_move = as_parser("OOO") | "OO" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("ep")); static auto const single_move = rule { "single_move" } = raw [ lexeme [ regular_move >> -char_("+#")] ]; static auto const full_move = rule { "full_move" } = uint_ >> (lexeme["..." >> attr(no_move)] | "." >> single_move) >> (single_move | attr(no_move)) >> -result; static auto const game_description = +full_move; static auto const single_game = rule { "single_game" } = -header >> game_description; static auto const games = *single_game; } } loloof64::PgnGamesExtractor::PgnGamesExtractor(std::ssortingng inputFilePath) { std::ifstream inputFile(inputFilePath); parseInput(inputFile); } loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); } loloof64::PgnGamesExtractor::~PgnGamesExtractor() { // dtor } void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) { if (inputFile.fail() || inputFile.bad()) throw new InputFileException("Could not read the input file !"); typedef boost::spirit::istream_iterator It; std::vector temp_games; It iter(inputFile >> std::noskipws), end; bool success = boost::spirit::x3::phrase_parse(iter, end, pgn_parser::games, boost::spirit::x3::space, temp_games); if (success && iter == end) { games.swap(temp_games); } else { std::ssortingng error_fragment(iter, end); throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !"); } } #include  int main() { loloof64::PgnGamesExtractor pge("ScotchGambit.pgn"); std::cout << "Parsed " << pge.getGames().size() << " games\n"; for (auto& g : pge.getGames()) for (auto& m : g.moves) std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n"; } 

En effet, le problème est avec Veronica. Ou, en fait, c’est avec Ver? Nica. Où ? est l’unité de code <93> – ce qui, en l’absence de pages de code / d’informations de codage, pourrait signifier quelque chose de vraiment.

Vous utilisez ascii::char et cela nécessite uniquement des caractères 7 bits.

Facilement le réparer en changeant

 using ascii::char_; 

dans

 using qi::char_;