From dca61c0261f8627814ff56cc8dcc5387de3860fd Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fran=C3=A7ois=20Goudal?= Date: Wed, 18 Aug 2021 23:01:23 +0200 Subject: [PATCH] jq: Backport a bunch of commits in order to fix jq-1.6 terrible performance on startup MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: François Goudal --- utils/jq/Makefile | 2 +- ...time-by-marking-subtrees-with-unboun.patch | 119 ++++++++++++ ...ock_drop_unreferenced-in-linear-time.patch | 85 +++++++++ ...-definition-of-block_bind_referenced.patch | 78 ++++++++ ...ror-message-when-rel_path-is-invalid.patch | 40 +++++ ...the-first-component-of-a-module-path.patch | 50 ++++++ ...ore-imports-special-case-with-assert.patch | 37 ++++ ...007-Add-import-metadata-key-optional.patch | 39 ++++ ...s-a-library-instead-of-with-builtins.patch | 99 ++++++++++ ...ding-fast-again-by-binding-only-refe.patch | 170 ++++++++++++++++++ 10 files changed, 718 insertions(+), 1 deletion(-) create mode 100644 utils/jq/patches/0001-Improve-linking-time-by-marking-subtrees-with-unboun.patch create mode 100644 utils/jq/patches/0002-Reimplement-block_drop_unreferenced-in-linear-time.patch create mode 100644 utils/jq/patches/0003-Simplify-definition-of-block_bind_referenced.patch create mode 100644 utils/jq/patches/0004-Pass-on-the-error-message-when-rel_path-is-invalid.patch create mode 100644 utils/jq/patches/0005-Catch-.-as-the-first-component-of-a-module-path.patch create mode 100644 utils/jq/patches/0006-Replace-TOP-before-imports-special-case-with-assert.patch create mode 100644 utils/jq/patches/0007-Add-import-metadata-key-optional.patch create mode 100644 utils/jq/patches/0008-Load-.jq-as-a-library-instead-of-with-builtins.patch create mode 100644 utils/jq/patches/0009-Make-builtin-binding-fast-again-by-binding-only-refe.patch diff --git a/utils/jq/Makefile b/utils/jq/Makefile index d5ffdd0916..402e9983cc 100644 --- a/utils/jq/Makefile +++ b/utils/jq/Makefile @@ -9,7 +9,7 @@ include $(TOPDIR)/rules.mk PKG_NAME:=jq PKG_VERSION:=1.6 -PKG_RELEASE:=1 +PKG_RELEASE:=2 PKG_LICENSE:=BSD PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz diff --git a/utils/jq/patches/0001-Improve-linking-time-by-marking-subtrees-with-unboun.patch b/utils/jq/patches/0001-Improve-linking-time-by-marking-subtrees-with-unboun.patch new file mode 100644 index 0000000000..f1d704dabe --- /dev/null +++ b/utils/jq/patches/0001-Improve-linking-time-by-marking-subtrees-with-unboun.patch @@ -0,0 +1,119 @@ +From a949ffe9554b5af5614d31b795805f56939a031b Mon Sep 17 00:00:00 2001 +From: Muh Muhten +Date: Fri, 8 Feb 2019 16:52:04 -0500 +Subject: [PATCH 1/9] Improve linking time by marking subtrees with unbound + symbols + +--- + src/compile.c | 29 +++++++++++++++++++++++++---- + 1 file changed, 25 insertions(+), 4 deletions(-) + +--- a/src/compile.c ++++ b/src/compile.c +@@ -49,9 +49,10 @@ struct inst { + // Unbound instructions (references to other things that may or may not + // exist) are created by "gen_foo_unbound", and bindings are created by + // block_bind(definition, body), which binds all instructions in +- // body which are unboudn and refer to "definition" by name. ++ // body which are unbound and refer to "definition" by name. + struct inst* bound_by; + char* symbol; ++ int any_unbound; + + int nformals; + int nactuals; +@@ -73,6 +74,7 @@ static inst* inst_new(opcode op) { + i->bytecode_pos = -1; + i->bound_by = 0; + i->symbol = 0; ++ i->any_unbound = 0; + i->nformals = -1; + i->nactuals = -1; + i->subfn = gen_noop(); +@@ -156,6 +158,7 @@ block gen_const_global(jv constant, cons + inst* i = inst_new(STORE_GLOBAL); + i->imm.constant = constant; + i->symbol = strdup(name); ++ i->any_unbound = 0; + return inst_block(i); + } + +@@ -211,6 +214,7 @@ block gen_op_unbound(opcode op, const ch + assert(opcode_describe(op)->flags & OP_HAS_BINDING); + inst* i = inst_new(op); + i->symbol = strdup(name); ++ i->any_unbound = 1; + return inst_block(i); + } + +@@ -224,6 +228,7 @@ block gen_op_bound(opcode op, block bind + assert(block_is_single(binder)); + block b = gen_op_unbound(op, binder.first->symbol); + b.first->bound_by = binder.first; ++ b.first->any_unbound = 0; + return b; + } + +@@ -324,7 +329,7 @@ static int block_count_refs(block binder + return nrefs; + } + +-static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) { ++static int block_bind_subblock_inner(int* any_unbound, block binder, block body, int bindflags, int break_distance) { + assert(block_is_single(binder)); + assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD)); + assert(binder.first->symbol); +@@ -336,6 +341,9 @@ static int block_bind_subblock(block bin + binder.first->nformals = block_count_formals(binder); + int nrefs = 0; + for (inst* i = body.first; i; i = i->next) { ++ if (i->any_unbound == 0) ++ continue; ++ + int flags = opcode_describe(i->op)->flags; + if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 && + (!strcmp(i->symbol, binder.first->symbol) || +@@ -357,14 +365,25 @@ static int block_bind_subblock(block bin + // a break whenever we come across a STOREV of *anonlabel... + break_distance++; + } ++ ++ i->any_unbound = (i->symbol && !i->bound_by); ++ + // binding recurses into closures +- nrefs += block_bind_subblock(binder, i->subfn, bindflags, break_distance); ++ nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->subfn, bindflags, break_distance); + // binding recurses into argument list +- nrefs += block_bind_subblock(binder, i->arglist, bindflags, break_distance); ++ nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->arglist, bindflags, break_distance); ++ ++ if (i->any_unbound) ++ *any_unbound = 1; + } + return nrefs; + } + ++static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) { ++ int any_unbound; ++ return block_bind_subblock_inner(&any_unbound, binder, body, bindflags, break_distance); ++} ++ + static int block_bind_each(block binder, block body, int bindflags) { + assert(block_has_only_binders(binder, bindflags)); + bindflags |= OP_HAS_BINDING; +@@ -550,6 +569,7 @@ block gen_function(const char* name, blo + } + i->subfn = body; + i->symbol = strdup(name); ++ i->any_unbound = -1; + i->arglist = formals; + block b = inst_block(i); + block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0); +@@ -1081,6 +1101,7 @@ block gen_cbinding(const struct cfunctio + inst* i = inst_new(CLOSURE_CREATE_C); + i->imm.cfunc = &cfunctions[cfunc]; + i->symbol = strdup(i->imm.cfunc->name); ++ i->any_unbound = 0; + code = block_bind(inst_block(i), code, OP_IS_CALL_PSEUDO); + } + return code; diff --git a/utils/jq/patches/0002-Reimplement-block_drop_unreferenced-in-linear-time.patch b/utils/jq/patches/0002-Reimplement-block_drop_unreferenced-in-linear-time.patch new file mode 100644 index 0000000000..3df0d00939 --- /dev/null +++ b/utils/jq/patches/0002-Reimplement-block_drop_unreferenced-in-linear-time.patch @@ -0,0 +1,85 @@ +From aab54373e9406ee2a154b8d6166b3045aa3484ee Mon Sep 17 00:00:00 2001 +From: Muh Muhten +Date: Sat, 9 Feb 2019 17:24:18 -0500 +Subject: [PATCH 2/9] Reimplement block_drop_unreferenced in linear time + +--- + src/compile.c | 50 +++++++++++++++++++++++++++++--------------------- + 1 file changed, 29 insertions(+), 21 deletions(-) + +--- a/src/compile.c ++++ b/src/compile.c +@@ -53,6 +53,7 @@ struct inst { + struct inst* bound_by; + char* symbol; + int any_unbound; ++ int referenced; + + int nformals; + int nactuals; +@@ -75,6 +76,7 @@ static inst* inst_new(opcode op) { + i->bound_by = 0; + i->symbol = 0; + i->any_unbound = 0; ++ i->referenced = 0; + i->nformals = -1; + i->nactuals = -1; + i->subfn = gen_noop(); +@@ -465,30 +467,36 @@ block block_bind_referenced(block binder + return block_join(refd, body); + } + ++static void block_mark_referenced(block body) { ++ int saw_top = 0; ++ for (inst* i = body.last; i; i = i->prev) { ++ if (saw_top && i->bound_by == i && !i->referenced) ++ continue; ++ if (i->op == TOP) { ++ saw_top = 1; ++ } ++ if (i->bound_by) { ++ i->bound_by->referenced = 1; ++ } ++ ++ block_mark_referenced(i->arglist); ++ block_mark_referenced(i->subfn); ++ } ++} ++ + block block_drop_unreferenced(block body) { +- inst* curr; ++ block_mark_referenced(body); ++ + block refd = gen_noop(); +- block unrefd = gen_noop(); +- int drop; +- do { +- drop = 0; +- while ((curr = block_take(&body)) && curr->op != TOP) { +- block b = inst_block(curr); +- if (block_count_refs(b,refd) + block_count_refs(b,body) == 0) { +- unrefd = BLOCK(unrefd, b); +- drop++; +- } else { +- refd = BLOCK(refd, b); +- } +- } +- if (curr && curr->op == TOP) { +- body = BLOCK(inst_block(curr),body); ++ inst* curr; ++ while ((curr = block_take(&body))) { ++ if (curr->bound_by == curr && !curr->referenced) { ++ inst_free(curr); ++ } else { ++ refd = BLOCK(inst_block(curr), refd); + } +- body = BLOCK(refd, body); +- refd = gen_noop(); +- } while (drop != 0); +- block_free(unrefd); +- return body; ++ } ++ return refd; + } + + jv block_take_imports(block* body) { diff --git a/utils/jq/patches/0003-Simplify-definition-of-block_bind_referenced.patch b/utils/jq/patches/0003-Simplify-definition-of-block_bind_referenced.patch new file mode 100644 index 0000000000..d710e4bc24 --- /dev/null +++ b/utils/jq/patches/0003-Simplify-definition-of-block_bind_referenced.patch @@ -0,0 +1,78 @@ +From e6676ebbd2ab0a6283d96c797dbe93552c1a222c Mon Sep 17 00:00:00 2001 +From: Muh Muhten +Date: Mon, 18 Feb 2019 21:00:59 -0500 +Subject: [PATCH 3/9] Simplify definition of block_bind_referenced + +--- + src/compile.c | 49 ++++++++----------------------------------------- + 1 file changed, 8 insertions(+), 41 deletions(-) + +--- a/src/compile.c ++++ b/src/compile.c +@@ -317,20 +317,6 @@ static int block_count_actuals(block b) + return args; + } + +-static int block_count_refs(block binder, block body) { +- int nrefs = 0; +- for (inst* i = body.first; i; i = i->next) { +- if (i != binder.first && i->bound_by == binder.first) { +- nrefs++; +- } +- // counting recurses into closures +- nrefs += block_count_refs(binder, i->subfn); +- // counting recurses into argument list +- nrefs += block_count_refs(binder, i->arglist); +- } +- return nrefs; +-} +- + static int block_bind_subblock_inner(int* any_unbound, block binder, block body, int bindflags, int break_distance) { + assert(block_is_single(binder)); + assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD)); +@@ -434,37 +420,18 @@ block block_bind_library(block binder, b + return body; // We don't return a join because we don't want those sticking around... + } + +-// Bind binder to body and throw away any defs in binder not referenced +-// (directly or indirectly) from body. ++// Bind binder to body, then throw it away if not referenced. + block block_bind_referenced(block binder, block body, int bindflags) { ++ assert(block_is_single(binder)); + assert(block_has_only_binders(binder, bindflags)); + bindflags |= OP_HAS_BINDING; +- block refd = gen_noop(); +- block unrefd = gen_noop(); +- int nrefs; +- for (int last_kept = 0, kept = 0; ; ) { +- for (inst* curr; (curr = block_take(&binder));) { +- block b = inst_block(curr); +- nrefs = block_bind_each(b, body, bindflags); +- // Check if this binder is referenced from any of the ones we +- // already know are referenced by body. +- nrefs += block_count_refs(b, refd); +- nrefs += block_count_refs(b, body); +- if (nrefs) { +- refd = BLOCK(refd, b); +- kept++; +- } else { +- unrefd = BLOCK(unrefd, b); +- } +- } +- if (kept == last_kept) +- break; +- last_kept = kept; +- binder = unrefd; +- unrefd = gen_noop(); ++ ++ if (block_bind_subblock(binder, body, bindflags, 0) == 0) { ++ block_free(binder); ++ } else { ++ body = BLOCK(binder, body); + } +- block_free(unrefd); +- return block_join(refd, body); ++ return body; + } + + static void block_mark_referenced(block body) { diff --git a/utils/jq/patches/0004-Pass-on-the-error-message-when-rel_path-is-invalid.patch b/utils/jq/patches/0004-Pass-on-the-error-message-when-rel_path-is-invalid.patch new file mode 100644 index 0000000000..39747cf4c7 --- /dev/null +++ b/utils/jq/patches/0004-Pass-on-the-error-message-when-rel_path-is-invalid.patch @@ -0,0 +1,40 @@ +From 2e3dbb884199bba6cc07345f6d394f1ac53465ac Mon Sep 17 00:00:00 2001 +From: Muh Muhten +Date: Tue, 19 Feb 2019 00:34:04 -0500 +Subject: [PATCH 4/9] Pass on the error message when rel_path is invalid + +"Module path must be a string" is not a useful error message when the +reason the module path isn't a string is because the string it was got +replaced with an invalid with an error message for some other reason. + +Also fixes a few memory leaks on early exits. +--- + src/linker.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +--- a/src/linker.c ++++ b/src/linker.c +@@ -138,10 +138,20 @@ static jv jv_basename(jv name) { + + // Asummes validated relative path to module + static jv find_lib(jq_state *jq, jv rel_path, jv search, const char *suffix, jv jq_origin, jv lib_origin) { +- if (jv_get_kind(search) != JV_KIND_ARRAY) +- return jv_invalid_with_msg(jv_string_fmt("Module search path must be an array")); +- if (jv_get_kind(rel_path) != JV_KIND_STRING) ++ if (!jv_is_valid(rel_path)) { ++ jv_free(search); ++ return rel_path; ++ } ++ if (jv_get_kind(rel_path) != JV_KIND_STRING) { ++ jv_free(rel_path); ++ jv_free(search); + return jv_invalid_with_msg(jv_string_fmt("Module path must be a string")); ++ } ++ if (jv_get_kind(search) != JV_KIND_ARRAY) { ++ jv_free(rel_path); ++ jv_free(search); ++ return jv_invalid_with_msg(jv_string_fmt("Module search path must be an array")); ++ } + + struct stat st; + int ret; diff --git a/utils/jq/patches/0005-Catch-.-as-the-first-component-of-a-module-path.patch b/utils/jq/patches/0005-Catch-.-as-the-first-component-of-a-module-path.patch new file mode 100644 index 0000000000..8c23bf4d69 --- /dev/null +++ b/utils/jq/patches/0005-Catch-.-as-the-first-component-of-a-module-path.patch @@ -0,0 +1,50 @@ +From d0fe86177427e0c3bc2cec1436d74472e4b618dd Mon Sep 17 00:00:00 2001 +From: Muh Muhten +Date: Tue, 19 Feb 2019 00:35:40 -0500 +Subject: [PATCH 5/9] Catch .. as the first component of a module path + +Only the second and subsequent path components were being checked, which +I guess is theoretically security-relevant. + +There's no apparent point to reconstructing the path after splitting it +by adding /s back in, either. +--- + src/linker.c | 9 ++------- + 1 file changed, 2 insertions(+), 7 deletions(-) + +--- a/src/linker.c ++++ b/src/linker.c +@@ -98,12 +98,9 @@ static jv validate_relpath(jv name) { + return res; + } + jv components = jv_string_split(jv_copy(name), jv_string("/")); +- jv rp = jv_array_get(jv_copy(components), 0); +- components = jv_array_slice(components, 1, jv_array_length(jv_copy(components))); + jv_array_foreach(components, i, x) { + if (!strcmp(jv_string_value(x), "..")) { + jv_free(x); +- jv_free(rp); + jv_free(components); + jv res = jv_invalid_with_msg(jv_string_fmt("Relative paths to modules may not traverse to parent directories (%s)", s)); + jv_free(name); +@@ -111,18 +108,16 @@ static jv validate_relpath(jv name) { + } + if (i > 0 && jv_equal(jv_copy(x), jv_array_get(jv_copy(components), i - 1))) { + jv_free(x); +- jv_free(rp); + jv_free(components); + jv res = jv_invalid_with_msg(jv_string_fmt("module names must not have equal consecutive components: %s", + jv_string_value(name))); + jv_free(name); + return res; + } +- rp = jv_string_concat(rp, jv_string_concat(jv_string("/"), x)); ++ jv_free(x); + } + jv_free(components); +- jv_free(name); +- return rp; ++ return name; + } + + // Assumes name has been validated diff --git a/utils/jq/patches/0006-Replace-TOP-before-imports-special-case-with-assert.patch b/utils/jq/patches/0006-Replace-TOP-before-imports-special-case-with-assert.patch new file mode 100644 index 0000000000..ae2c4069dc --- /dev/null +++ b/utils/jq/patches/0006-Replace-TOP-before-imports-special-case-with-assert.patch @@ -0,0 +1,37 @@ +From a114b871e460ef2ddcf7698bc6b18651c976626a Mon Sep 17 00:00:00 2001 +From: Muh Muhten +Date: Tue, 19 Feb 2019 00:14:53 -0500 +Subject: [PATCH 6/9] Replace TOP-before-imports special case with assert + +The case isn't actually possible afaict. +--- + src/compile.c | 11 ++++------- + 1 file changed, 4 insertions(+), 7 deletions(-) + +--- a/src/compile.c ++++ b/src/compile.c +@@ -469,10 +469,10 @@ block block_drop_unreferenced(block body + jv block_take_imports(block* body) { + jv imports = jv_array(); + +- inst* top = NULL; +- if (body->first && body->first->op == TOP) { +- top = block_take(body); +- } ++ /* Parser should never generate TOP before imports */ ++ assert(!(body->first && body->first->op == TOP && body->first->next && ++ (body->first->next->op == MODULEMETA || body->first->next->op == DEPS))); ++ + while (body->first && (body->first->op == MODULEMETA || body->first->op == DEPS)) { + inst* dep = block_take(body); + if (dep->op == DEPS) { +@@ -480,9 +480,6 @@ jv block_take_imports(block* body) { + } + inst_free(dep); + } +- if (top) { +- *body = block_join(inst_block(top),*body); +- } + return imports; + } + diff --git a/utils/jq/patches/0007-Add-import-metadata-key-optional.patch b/utils/jq/patches/0007-Add-import-metadata-key-optional.patch new file mode 100644 index 0000000000..8b1d8052ae --- /dev/null +++ b/utils/jq/patches/0007-Add-import-metadata-key-optional.patch @@ -0,0 +1,39 @@ +From 90b92d8c73446bb50eee14ca8d88c5224002001a Mon Sep 17 00:00:00 2001 +From: Muh Muhten +Date: Sun, 15 Oct 2017 01:57:17 -0400 +Subject: [PATCH 7/9] Add import metadata key "optional" + +A library marked is imported if found, but silently skipped if missing. +This is the desired semantic for the auto-include at ~/.jq +--- + src/linker.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/src/linker.c ++++ b/src/linker.c +@@ -246,6 +246,9 @@ static int process_dependencies(jq_state + jv v = jv_object_get(jv_copy(dep), jv_string("raw")); + if (jv_get_kind(v) == JV_KIND_TRUE) + raw = 1; ++ int optional = 0; ++ if (jv_get_kind(jv_object_get(jv_copy(dep), jv_string("optional"))) == JV_KIND_TRUE) ++ optional = 1; + jv_free(v); + jv relpath = validate_relpath(jv_object_get(jv_copy(dep), jv_string("relpath"))); + jv as = jv_object_get(jv_copy(dep), jv_string("as")); +@@ -259,10 +262,14 @@ static int process_dependencies(jq_state + jv resolved = find_lib(jq, relpath, search, is_data ? ".json" : ".jq", jv_copy(jq_origin), jv_copy(lib_origin)); + // XXX ...move the rest of this into a callback. + if (!jv_is_valid(resolved)) { ++ jv_free(as); ++ if (optional) { ++ jv_free(resolved); ++ continue; ++ } + jv emsg = jv_invalid_get_msg(resolved); + jq_report_error(jq, jv_string_fmt("jq: error: %s\n",jv_string_value(emsg))); + jv_free(emsg); +- jv_free(as); + jv_free(deps); + jv_free(jq_origin); + jv_free(lib_origin); diff --git a/utils/jq/patches/0008-Load-.jq-as-a-library-instead-of-with-builtins.patch b/utils/jq/patches/0008-Load-.jq-as-a-library-instead-of-with-builtins.patch new file mode 100644 index 0000000000..4d936559a3 --- /dev/null +++ b/utils/jq/patches/0008-Load-.jq-as-a-library-instead-of-with-builtins.patch @@ -0,0 +1,99 @@ +From 4c5a08b9e01ebfce5c8914dd82c1722737bbecab Mon Sep 17 00:00:00 2001 +From: Muh Muhten +Date: Tue, 19 Feb 2019 00:39:34 -0500 +Subject: [PATCH 8/9] Load ~/.jq as a library instead of with builtins + +Remove the special code which loads ~/.jq in builtin.c, and instead glue +an optional include which points to the same file onto the main program +in linker.c. + +Fixes a minor bug where errors in ~/.jq would be labelled . +--- + src/builtin.c | 44 +++++++------------------------------------- + src/linker.c | 10 ++++++++++ + 2 files changed, 17 insertions(+), 37 deletions(-) + +--- a/src/builtin.c ++++ b/src/builtin.c +@@ -1706,9 +1706,7 @@ static block bind_bytecoded_builtins(blo + return block_bind(builtins, b, OP_IS_CALL_PSEUDO); + } + +- +- +-static const char* const jq_builtins = ++static const char jq_builtins[] = + /* Include jq-coded builtins */ + #include "src/builtin.inc" + +@@ -1744,45 +1742,17 @@ static block gen_builtin_list(block buil + return BLOCK(builtins, gen_function("builtins", gen_noop(), gen_const(list))); + } + +-static int builtins_bind_one(jq_state *jq, block* bb, const char* code) { +- struct locfile* src; +- src = locfile_init(jq, "", code, strlen(code)); +- block funcs; +- int nerrors = jq_parse_library(src, &funcs); +- if (nerrors == 0) { +- *bb = block_bind(funcs, *bb, OP_IS_CALL_PSEUDO); +- } +- locfile_free(src); +- return nerrors; +-} +- +-static int slurp_lib(jq_state *jq, block* bb) { +- int nerrors = 0; +- char* home = getenv("HOME"); +- if (home) { // silently ignore no $HOME +- jv filename = jv_string_append_str(jv_string(home), "/.jq"); +- jv data = jv_load_file(jv_string_value(filename), 1); +- if (jv_is_valid(data)) { +- nerrors = builtins_bind_one(jq, bb, jv_string_value(data) ); +- } +- jv_free(filename); +- jv_free(data); +- } +- return nerrors; +-} +- + int builtins_bind(jq_state *jq, block* bb) { +- block builtins = gen_noop(); +- int nerrors = slurp_lib(jq, bb); +- if (nerrors) { +- block_free(*bb); +- return nerrors; +- } +- nerrors = builtins_bind_one(jq, &builtins, jq_builtins); ++ block builtins; ++ struct locfile* src = locfile_init(jq, "", jq_builtins, sizeof(jq_builtins)-1); ++ int nerrors = jq_parse_library(src, &builtins); + assert(!nerrors); ++ locfile_free(src); ++ + builtins = bind_bytecoded_builtins(builtins); + builtins = gen_cbinding(function_list, sizeof(function_list)/sizeof(function_list[0]), builtins); + builtins = gen_builtin_list(builtins); ++ + *bb = block_bind(builtins, *bb, OP_IS_CALL_PSEUDO); + *bb = block_drop_unreferenced(*bb); + return nerrors; +--- a/src/linker.c ++++ b/src/linker.c +@@ -387,6 +387,16 @@ int load_program(jq_state *jq, struct lo + if (nerrors) + return nerrors; + ++ char* home = getenv("HOME"); ++ if (home) { // silently ignore no $HOME ++ /* Import ~/.jq as a library named "" found in $HOME */ ++ block import = gen_import_meta(gen_import("", NULL, 0), ++ gen_const(JV_OBJECT( ++ jv_string("optional"), jv_true(), ++ jv_string("search"), jv_string(home)))); ++ program = BLOCK(import, program); ++ } ++ + nerrors = process_dependencies(jq, jq_get_jq_origin(jq), jq_get_prog_origin(jq), &program, &lib_state); + block libs = gen_noop(); + for (uint64_t i = 0; i < lib_state.ct; ++i) { diff --git a/utils/jq/patches/0009-Make-builtin-binding-fast-again-by-binding-only-refe.patch b/utils/jq/patches/0009-Make-builtin-binding-fast-again-by-binding-only-refe.patch new file mode 100644 index 0000000000..0731e0f520 --- /dev/null +++ b/utils/jq/patches/0009-Make-builtin-binding-fast-again-by-binding-only-refe.patch @@ -0,0 +1,170 @@ +From 916c12fb593005771a6ce098f5a7da4dec0051d1 Mon Sep 17 00:00:00 2001 +From: Muh Muhten +Date: Wed, 20 Feb 2019 01:48:56 -0500 +Subject: [PATCH 9/9] Make builtin binding fast again by binding only + referenced symbols + +Avoid doing the internal binding of top-level symbols in the parser, +leaving that work to be done in a post-processing step. For builtins, +this lets us do a reference-aware bind step (block_bind_incremental) +*after* generating builtins/0. + +Libraries are a bit trickier since they may be bound multiple times, so +instead of thinking through the implications I added (block_bind_self) +to resolve all internal symbols immediately. +--- + src/builtin.c | 4 ++-- + src/compile.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++---- + src/compile.h | 3 ++- + src/linker.c | 1 + + src/parser.c | 2 +- + src/parser.y | 2 +- + 6 files changed, 54 insertions(+), 9 deletions(-) + +--- a/src/builtin.c ++++ b/src/builtin.c +@@ -1703,7 +1703,7 @@ static block bind_bytecoded_builtins(blo + BLOCK(gen_param("start"), gen_param("end")), + range)); + } +- return block_bind(builtins, b, OP_IS_CALL_PSEUDO); ++ return BLOCK(builtins, b); + } + + static const char jq_builtins[] = +@@ -1753,7 +1753,7 @@ int builtins_bind(jq_state *jq, block* b + builtins = gen_cbinding(function_list, sizeof(function_list)/sizeof(function_list[0]), builtins); + builtins = gen_builtin_list(builtins); + +- *bb = block_bind(builtins, *bb, OP_IS_CALL_PSEUDO); ++ *bb = block_bind_incremental(builtins, *bb, OP_IS_CALL_PSEUDO); + *bb = block_drop_unreferenced(*bb); + return nerrors; + } +--- a/src/compile.c ++++ b/src/compile.c +@@ -222,8 +222,9 @@ block gen_op_unbound(opcode op, const ch + + block gen_op_var_fresh(opcode op, const char* name) { + assert(opcode_describe(op)->flags & OP_HAS_VARIABLE); +- return block_bind(gen_op_unbound(op, name), +- gen_noop(), OP_HAS_VARIABLE); ++ block b = gen_op_unbound(op, name); ++ b.first->bound_by = b.first; ++ return b; + } + + block gen_op_bound(opcode op, block binder) { +@@ -382,7 +383,7 @@ static int block_bind_each(block binder, + return nrefs; + } + +-block block_bind(block binder, block body, int bindflags) { ++static block block_bind(block binder, block body, int bindflags) { + block_bind_each(binder, body, bindflags); + return block_join(binder, body); + } +@@ -434,6 +435,48 @@ block block_bind_referenced(block binder + return body; + } + ++static inst* block_take_last(block* b) { ++ inst* i = b->last; ++ if (i == 0) ++ return 0; ++ if (i->prev) { ++ i->prev->next = i->next; ++ b->last = i->prev; ++ i->prev = 0; ++ } else { ++ b->first = 0; ++ b->last = 0; ++ } ++ return i; ++} ++ ++// Binds a sequence of binders, which *must not* alrady be bound to each other, ++// to body, throwing away unreferenced defs ++block block_bind_incremental(block binder, block body, int bindflags) { ++ assert(block_has_only_binders(binder, bindflags)); ++ bindflags |= OP_HAS_BINDING; ++ ++ inst* curr; ++ while ((curr = block_take_last(&binder))) { ++ body = block_bind_referenced(inst_block(curr), body, bindflags); ++ } ++ return body; ++} ++ ++block block_bind_self(block binder, int bindflags) { ++ assert(block_has_only_binders(binder, bindflags)); ++ bindflags |= OP_HAS_BINDING; ++ block body = gen_noop(); ++ ++ inst* curr; ++ while ((curr = block_take_last(&binder))) { ++ block b = inst_block(curr); ++ block_bind_subblock(b, body, bindflags, 0); ++ body = BLOCK(b, body); ++ } ++ return body; ++} ++ + static void block_mark_referenced(block body) { + int saw_top = 0; + for (inst* i = body.last; i; i = i->prev) { +@@ -1074,7 +1117,7 @@ block gen_cbinding(const struct cfunctio + i->imm.cfunc = &cfunctions[cfunc]; + i->symbol = strdup(i->imm.cfunc->name); + i->any_unbound = 0; +- code = block_bind(inst_block(i), code, OP_IS_CALL_PSEUDO); ++ code = BLOCK(inst_block(i), code); + } + return code; + } +--- a/src/compile.h ++++ b/src/compile.h +@@ -72,9 +72,10 @@ int block_has_only_binders(block, int bi + int block_has_main(block); + int block_is_funcdef(block b); + int block_is_single(block b); +-block block_bind(block binder, block body, int bindflags); + block block_bind_library(block binder, block body, int bindflags, const char* libname); + block block_bind_referenced(block binder, block body, int bindflags); ++block block_bind_incremental(block binder, block body, int bindflags); ++block block_bind_self(block binder, int bindflags); + block block_drop_unreferenced(block body); + + jv block_take_imports(block* body); +--- a/src/linker.c ++++ b/src/linker.c +@@ -336,6 +336,7 @@ static int load_library(jq_state *jq, jv + jv_string(dirname(lib_origin)), + &program, lib_state); + free(lib_origin); ++ program = block_bind_self(program, OP_IS_CALL_PSEUDO); + } + } + state_idx = lib_state->ct++; +--- a/src/parser.c ++++ b/src/parser.c +@@ -2425,7 +2425,7 @@ yyreduce: + case 9: + #line 333 "src/parser.y" /* yacc.c:1646 */ + { +- (yyval.blk) = block_bind((yyvsp[-1].blk), (yyvsp[0].blk), OP_IS_CALL_PSEUDO); ++ (yyval.blk) = block_join((yyvsp[-1].blk), (yyvsp[0].blk)); + } + #line 2431 "src/parser.c" /* yacc.c:1646 */ + break; +--- a/src/parser.y ++++ b/src/parser.y +@@ -331,7 +331,7 @@ FuncDefs: + $$ = gen_noop(); + } | + FuncDef FuncDefs { +- $$ = block_bind($1, $2, OP_IS_CALL_PSEUDO); ++ $$ = block_join($1, $2); + } + + Exp: -- 2.30.2