Améliorez les performances médiocres de votre esprit avec Alternative Parser

J’ai déjà posé une question à ce sujet. Mais comme il n’y avait pas de réponse, je vous le demande à nouveau avec un extrait de code source compilable.

Cet extrait de code doit être compilé sans option std = c ++ 11, en raison de problèmes liés à la sémantique de boost :: variant move. Juste ‘g ++ -Wall -pedantic’.

Dans cet extrait de code, vous trouverez la ligne “// Comment here”. Vous pouvez commenter le bloc suivant jusqu’à “// And here —–“. Si ce bloc n’est pas commenté, les performances de ce programme sont très médiocres.

Donc, le goulot d’étranglement, aussi longtemps que je peux le voir, est un parsingur syntaxique alternatif. Ce dont j’ai besoin, ce sont quelques suggestions pour améliorer / changer ma grammaire afin d’améliorer les performances d’parsing. Merci.

Code:

#include  #include  #include  #include  #include  #include  #include  #include  #include  #include  #include  #include  #include  #include  #include  #include  #include  #include  #include  namespace qi = boost::spirit::qi; namespace ascii = boost::spirit::ascii; namespace phoenix = boost::phoenix; namespace client { typedef std::ssortingng::iterator input_iterator_type; struct op_and {}; struct op_or {}; struct op_eq {}; struct op_neq {}; struct is_part_of {}; struct more {}; struct more_eq {}; struct less {}; struct less_eq {}; struct mask_match {}; struct mask_not_match {}; struct in {}; namespace type { enum code_t { ssortingng = 0, boolean = 1, number = 2, none = 3, datetime = 4, unknown = 5 }; } template  struct binop; struct fn_call; struct none_type {~none_type(){}}; struct datetime { datetime(int yyyy, int mm, int dd, int hh24, int mi, int ss, int mls) : yy(yyyy), mm(mm), dd(dd), hh(hh24), mi(mi), ss(ss), ms(mls) {} datetime() : yy(0), mm(0), dd(0), hh(0), mi(0), ss(0), ms(0) {} int yy; int mm; int dd; int hh; int mi; int ss; int ms; }; typedef boost::variant< boost::recursive_wrapper<binop >, boost::recursive_wrapper<binop >, boost::recursive_wrapper<binop >, boost::recursive_wrapper<binop >, boost::recursive_wrapper<binop >, boost::recursive_wrapper<binop >, boost::recursive_wrapper<binop >, boost::recursive_wrapper<binop >, boost::recursive_wrapper<binop >, boost::recursive_wrapper<binop >, boost::recursive_wrapper<binop >, boost::recursive_wrapper<binop > > generic_binop; typedef boost::variant < std::string, double, none_type, datetime, boost::recursive_wrapper, boost::recursive_wrapper > node_value_t; struct g_node { mutable type::code_t type_id; mutable node_value_t value; }; typedef node_value_t value_type; template  struct binop { explicit binop(const g_node& l, const g_node& r) : oper1(l), oper2(r) {} g_node oper1, oper2; }; typedef std::vector node_vector; struct fn_call { explicit fn_call(){} std::ssortingng name; node_vector params; }; } BOOST_FUSION_ADAPT_STRUCT( client::g_node, (client::type::code_t, type_id) (client::node_value_t, value) ) BOOST_FUSION_ADAPT_STRUCT( client::fn_call, (std::ssortingng, name) (std::vector, params) ) namespace client { template  struct g_error_handler { template struct result { typedef void type; }; void operator()(Iterator first, Iterator last, Iterator err_pos, boost::spirit::info const& what) const { std::cout << "Syntax error. Expected: " << what << " at: " << std::distance(first, err_pos) << std::endl; } }; template<typename Iterator, typename ErrorHandler = g_error_handler > struct g_parser : qi::grammar { g_parser() : g_parser::base_type(or_, "G"), error_handler(ErrorHandler()) { using phoenix::at_c; or_ = (and_ >> "||" >> or_)[ at_c(qi::_val) = type::unknown, at_c(qi::_val) = phoenix::construct<binop >(qi::_1, qi::_2)] | and_[qi::_val = qi::_1]; and_ = (bool_op >> "&&" >> and_)[ at_c(qi::_val) = type::unknown, at_c(qi::_val) = phoenix::construct<binop >(qi::_1, qi::_2)] | bool_op[qi::_val = qi::_1]; bool_op = (prim >> "=" >> bool_op)[ at_c(qi::_val) = type::unknown, at_c(qi::_val) = phoenix::construct<binop >(qi::_1, qi::_2)] | (prim >> "" >> bool_op)[ at_c(qi::_val) = type::unknown, at_c(qi::_val) = phoenix::construct<binop >(qi::_1, qi::_2)] | // Comment here --------------------------------------------------- (prim >> ":" >> bool_op)[ at_c(qi::_val) = type::unknown, at_c(qi::_val) = phoenix::construct<binop >(qi::_1, qi::_2)] | (prim >> ">" >> bool_op)[ at_c(qi::_val) = type::unknown, at_c(qi::_val) = phoenix::construct<binop >(qi::_1, qi::_2)] | (prim >> ">=" >> bool_op)[ at_c(qi::_val) = type::unknown, at_c(qi::_val) = phoenix::construct<binop >(qi::_1, qi::_2)] | (prim >> "> bool_op)[ at_c(qi::_val) = type::unknown, at_c(qi::_val) = phoenix::construct<binop >(qi::_1, qi::_2)] | (prim >> "> bool_op)[ at_c(qi::_val) = type::unknown, at_c(qi::_val) = phoenix::construct<binop >(qi::_1, qi::_2)] | (prim >> "=~" >> bool_op)[ at_c(qi::_val) = type::unknown, at_c(qi::_val) = phoenix::construct<binop >(qi::_1, qi::_2)] | (prim >> "!~" >> bool_op)[ at_c(qi::_val) = type::unknown, at_c(qi::_val) = phoenix::construct<binop >(qi::_1, qi::_2)] | (prim >> "in" >> bool_op)[ at_c(qi::_val) = type::unknown, at_c(qi::_val) = phoenix::construct<binop >(qi::_1, qi::_2)] | // And here------------------------------------------------------------ prim[qi::_val = qi::_1]; prim = ssortingng_val [qi::_val = qi::_1] | number [qi::_val = qi::_1] | func_call [at_c(qi::_val) = type::unknown, at_c(qi::_val) = qi::_1] | '(' > or_ [qi::_val = qi::_1] > ')'; quoted_ssortingng %= "'" > qi::lexeme[*(qi::char_ - "'")] > "'"; func_call = fn_name > '(' > -(or_ % ',') > ')'; fn_name %= +(qi::alpha) > -(qi::char_('-')) > *(qi::alnum); ssortingng_val = quoted_ssortingng[ at_c(qi::_val) = type::ssortingng, at_c(qi::_val) = qi::_1]; number %= qi::attr(type::number) >> qi::double_; or_.name ("OR expression" ); and_.name ("AND expression" ); bool_op.name ("BOOL expression"); prim.name ("Atom expression"); quoted_ssortingng.name ("quoted ssortingng" ); fn_name.name ("function name" ); number.name ("number" ); qi::on_error(or_, error_handler(qi::_1, qi::_2, qi::_3, qi::_4)); } qi::rule and_, bool_op, prim, or_, ssortingng_val, number; qi::rule func_call; qi::rule quoted_ssortingng, fn_name; boost::phoenix::function error_handler; }; typedef g_parser grammar; } int main() { std::ssortingng s = "((foo(bar('','')) = foo('')) || ('a' = 'gg')) && (3  7) && (foo('','') = bar('','')) && 2=4 && 'a' ='b' && foo('')  foo('')"; client::grammar g; client::g_node ast; std::ssortingng::iterator begin = s.begin(); std::ssortingng::iterator end = s.end(); bool success = qi::phrase_parse(begin, end, g, boost::spirit::ascii::space, ast) && begin == end; std::ssortingngstream ss; if(!success) std::cout << "Syntax error at: " << std::distance(s.begin(), begin); else std::cout << "Syntax is Ok\n"; }

Vous pouvez refactoriser la grammaire pour induire moins de retour en arrière. J’ai déjà fait un exemple d’une telle refactorisation sur SO.

~~Lien: non trouvé~~

Cependant, voici l’essentiel. Les éléments suivants devraient être équivalents, à l’exception du besoin très réduit de revenir en arrière:

Voir en direct sur Coliru

 using boost::phoenix::construct; using qi::_val; using qi::_1; bool_op = prim [ _val = _1 ] >> -(( ("=" >> bool_op [ at_c<1>(_val) = construct > (_val, _1) ]) | ("<>" >> bool_op [ at_c<1>(_val) = construct > (_val, _1) ]) | (":" >> bool_op [ at_c<1>(_val) = construct > (_val, _1) ]) | (">" >> bool_op [ at_c<1>(_val) = construct > (_val, _1) ]) | (">=" >> bool_op [ at_c<1>(_val) = construct > (_val, _1) ]) | ("<" >> bool_op [ at_c<1>(_val) = construct > (_val, _1) ]) | ("<=" >> bool_op [ at_c<1>(_val) = construct > (_val, _1) ]) | ("=~" >> bool_op [ at_c<1>(_val) = construct > (_val, _1) ]) | ("!~" >> bool_op [ at_c<1>(_val) = construct > (_val, _1) ]) | ("in" >> bool_op [ at_c<1>(_val) = construct > (_val, _1) ]) ) >> qi::eps [ at_c<0>(_val) = type::unknown ]) ;

Notez également ma tendance à éviter le code répété et à présenter le code . Même si vous avez une norme de codage qui impose une longueur de ligne maximale, elle est clairement plus lisible, plus facile à gérer et beaucoup moins sujette aux erreurs, pour mettre en forme le code dans des colonnes alignées, comme je l’ai fait. En fait, vous pouvez simplement dire que ce morceau de code est une “méta-donnée”, une table si vous voulez, et vous pouvez faire une exception bien raisonnée.

Je pense que je vois un certain nombre d’autres “odeurs de code” – ou, plus positivement, de “possibilités de simplification”.

En fait, j’ai refactorisé de nombreuses grammaires comme SO dans le passé et ai généralement réduit le code à moins de 50% du code d’origine, en ajoutant souvent des fonctionnalités en même temps. Je n’ai malheureusement pas le temps de les trouver pour le moment, mais vous pouvez peut-être jeter un coup d’œil par vous-même.