jq: Backport a bunch of commits in order to fix jq-1.6 terrible performance on startup
authorFrançois Goudal <francois.goudal@airmont.com>
Wed, 18 Aug 2021 21:01:23 +0000 (23:01 +0200)
committerFrançois Goudal <francois.goudal@airmont.com>
Mon, 23 Aug 2021 09:15:44 +0000 (11:15 +0200)
Signed-off-by: François Goudal <francois.goudal@airmont.com>
utils/jq/Makefile
utils/jq/patches/0001-Improve-linking-time-by-marking-subtrees-with-unboun.patch [new file with mode: 0644]
utils/jq/patches/0002-Reimplement-block_drop_unreferenced-in-linear-time.patch [new file with mode: 0644]
utils/jq/patches/0003-Simplify-definition-of-block_bind_referenced.patch [new file with mode: 0644]
utils/jq/patches/0004-Pass-on-the-error-message-when-rel_path-is-invalid.patch [new file with mode: 0644]
utils/jq/patches/0005-Catch-.-as-the-first-component-of-a-module-path.patch [new file with mode: 0644]
utils/jq/patches/0006-Replace-TOP-before-imports-special-case-with-assert.patch [new file with mode: 0644]
utils/jq/patches/0007-Add-import-metadata-key-optional.patch [new file with mode: 0644]
utils/jq/patches/0008-Load-.jq-as-a-library-instead-of-with-builtins.patch [new file with mode: 0644]
utils/jq/patches/0009-Make-builtin-binding-fast-again-by-binding-only-refe.patch [new file with mode: 0644]

index d5ffdd0916c45d463310f3f5faf8200418f36703..402e9983cc458939a29d78e3be26410baf695f0d 100644 (file)
@@ -9,7 +9,7 @@ include $(TOPDIR)/rules.mk
 
 PKG_NAME:=jq
 PKG_VERSION:=1.6
-PKG_RELEASE:=1
+PKG_RELEASE:=2
 PKG_LICENSE:=BSD
 
 PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz
diff --git a/utils/jq/patches/0001-Improve-linking-time-by-marking-subtrees-with-unboun.patch b/utils/jq/patches/0001-Improve-linking-time-by-marking-subtrees-with-unboun.patch
new file mode 100644 (file)
index 0000000..f1d704d
--- /dev/null
@@ -0,0 +1,119 @@
+From a949ffe9554b5af5614d31b795805f56939a031b Mon Sep 17 00:00:00 2001
+From: Muh Muhten <muh.muhten@gmail.com>
+Date: Fri, 8 Feb 2019 16:52:04 -0500
+Subject: [PATCH 1/9] Improve linking time by marking subtrees with unbound
+ symbols
+
+---
+ src/compile.c | 29 +++++++++++++++++++++++++----
+ 1 file changed, 25 insertions(+), 4 deletions(-)
+
+--- a/src/compile.c
++++ b/src/compile.c
+@@ -49,9 +49,10 @@ struct inst {
+   // Unbound instructions (references to other things that may or may not
+   // exist) are created by "gen_foo_unbound", and bindings are created by
+   // block_bind(definition, body), which binds all instructions in
+-  // body which are unboudn and refer to "definition" by name.
++  // body which are unbound and refer to "definition" by name.
+   struct inst* bound_by;
+   char* symbol;
++  int any_unbound;
+   int nformals;
+   int nactuals;
+@@ -73,6 +74,7 @@ static inst* inst_new(opcode op) {
+   i->bytecode_pos = -1;
+   i->bound_by = 0;
+   i->symbol = 0;
++  i->any_unbound = 0;
+   i->nformals = -1;
+   i->nactuals = -1;
+   i->subfn = gen_noop();
+@@ -156,6 +158,7 @@ block gen_const_global(jv constant, cons
+   inst* i = inst_new(STORE_GLOBAL);
+   i->imm.constant = constant;
+   i->symbol = strdup(name);
++  i->any_unbound = 0;
+   return inst_block(i);
+ }
+@@ -211,6 +214,7 @@ block gen_op_unbound(opcode op, const ch
+   assert(opcode_describe(op)->flags & OP_HAS_BINDING);
+   inst* i = inst_new(op);
+   i->symbol = strdup(name);
++  i->any_unbound = 1;
+   return inst_block(i);
+ }
+@@ -224,6 +228,7 @@ block gen_op_bound(opcode op, block bind
+   assert(block_is_single(binder));
+   block b = gen_op_unbound(op, binder.first->symbol);
+   b.first->bound_by = binder.first;
++  b.first->any_unbound = 0;
+   return b;
+ }
+@@ -324,7 +329,7 @@ static int block_count_refs(block binder
+   return nrefs;
+ }
+-static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) {
++static int block_bind_subblock_inner(int* any_unbound, block binder, block body, int bindflags, int break_distance) {
+   assert(block_is_single(binder));
+   assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD));
+   assert(binder.first->symbol);
+@@ -336,6 +341,9 @@ static int block_bind_subblock(block bin
+     binder.first->nformals = block_count_formals(binder);
+   int nrefs = 0;
+   for (inst* i = body.first; i; i = i->next) {
++    if (i->any_unbound == 0)
++      continue;
++
+     int flags = opcode_describe(i->op)->flags;
+     if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 &&
+         (!strcmp(i->symbol, binder.first->symbol) ||
+@@ -357,14 +365,25 @@ static int block_bind_subblock(block bin
+       // a break whenever we come across a STOREV of *anonlabel...
+       break_distance++;
+     }
++
++    i->any_unbound = (i->symbol && !i->bound_by);
++
+     // binding recurses into closures
+-    nrefs += block_bind_subblock(binder, i->subfn, bindflags, break_distance);
++    nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->subfn, bindflags, break_distance);
+     // binding recurses into argument list
+-    nrefs += block_bind_subblock(binder, i->arglist, bindflags, break_distance);
++    nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->arglist, bindflags, break_distance);
++
++    if (i->any_unbound)
++      *any_unbound = 1;
+   }
+   return nrefs;
+ }
++static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) {
++  int any_unbound;
++  return block_bind_subblock_inner(&any_unbound, binder, body, bindflags, break_distance);
++}
++
+ static int block_bind_each(block binder, block body, int bindflags) {
+   assert(block_has_only_binders(binder, bindflags));
+   bindflags |= OP_HAS_BINDING;
+@@ -550,6 +569,7 @@ block gen_function(const char* name, blo
+   }
+   i->subfn = body;
+   i->symbol = strdup(name);
++  i->any_unbound = -1;
+   i->arglist = formals;
+   block b = inst_block(i);
+   block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
+@@ -1081,6 +1101,7 @@ block gen_cbinding(const struct cfunctio
+     inst* i = inst_new(CLOSURE_CREATE_C);
+     i->imm.cfunc = &cfunctions[cfunc];
+     i->symbol = strdup(i->imm.cfunc->name);
++    i->any_unbound = 0;
+     code = block_bind(inst_block(i), code, OP_IS_CALL_PSEUDO);
+   }
+   return code;
diff --git a/utils/jq/patches/0002-Reimplement-block_drop_unreferenced-in-linear-time.patch b/utils/jq/patches/0002-Reimplement-block_drop_unreferenced-in-linear-time.patch
new file mode 100644 (file)
index 0000000..3df0d00
--- /dev/null
@@ -0,0 +1,85 @@
+From aab54373e9406ee2a154b8d6166b3045aa3484ee Mon Sep 17 00:00:00 2001
+From: Muh Muhten <muh.muhten@gmail.com>
+Date: Sat, 9 Feb 2019 17:24:18 -0500
+Subject: [PATCH 2/9] Reimplement block_drop_unreferenced in linear time
+
+---
+ src/compile.c | 50 +++++++++++++++++++++++++++++---------------------
+ 1 file changed, 29 insertions(+), 21 deletions(-)
+
+--- a/src/compile.c
++++ b/src/compile.c
+@@ -53,6 +53,7 @@ struct inst {
+   struct inst* bound_by;
+   char* symbol;
+   int any_unbound;
++  int referenced;
+   int nformals;
+   int nactuals;
+@@ -75,6 +76,7 @@ static inst* inst_new(opcode op) {
+   i->bound_by = 0;
+   i->symbol = 0;
+   i->any_unbound = 0;
++  i->referenced = 0;
+   i->nformals = -1;
+   i->nactuals = -1;
+   i->subfn = gen_noop();
+@@ -465,30 +467,36 @@ block block_bind_referenced(block binder
+   return block_join(refd, body);
+ }
++static void block_mark_referenced(block body) {
++  int saw_top = 0;
++  for (inst* i = body.last; i; i = i->prev) {
++    if (saw_top && i->bound_by == i && !i->referenced)
++      continue;
++    if (i->op == TOP) {
++      saw_top = 1;
++    }
++    if (i->bound_by) {
++      i->bound_by->referenced = 1;
++    }
++
++    block_mark_referenced(i->arglist);
++    block_mark_referenced(i->subfn);
++  }
++}
++
+ block block_drop_unreferenced(block body) {
+-  inst* curr;
++  block_mark_referenced(body);
++
+   block refd = gen_noop();
+-  block unrefd = gen_noop();
+-  int drop;
+-  do {
+-    drop = 0;
+-    while ((curr = block_take(&body)) && curr->op != TOP) {
+-      block b = inst_block(curr);
+-      if (block_count_refs(b,refd) + block_count_refs(b,body) == 0) {
+-        unrefd = BLOCK(unrefd, b);
+-        drop++;
+-      } else {
+-        refd = BLOCK(refd, b);
+-      }
+-    }
+-    if (curr && curr->op == TOP) {
+-      body = BLOCK(inst_block(curr),body);
++  inst* curr;
++  while ((curr = block_take(&body))) {
++    if (curr->bound_by == curr && !curr->referenced) {
++      inst_free(curr);
++    } else {
++      refd = BLOCK(inst_block(curr), refd);
+     }
+-    body = BLOCK(refd, body);
+-    refd = gen_noop();
+-  } while (drop != 0);
+-  block_free(unrefd);
+-  return body;
++  }
++  return refd;
+ }
+ jv block_take_imports(block* body) {
diff --git a/utils/jq/patches/0003-Simplify-definition-of-block_bind_referenced.patch b/utils/jq/patches/0003-Simplify-definition-of-block_bind_referenced.patch
new file mode 100644 (file)
index 0000000..d710e4b
--- /dev/null
@@ -0,0 +1,78 @@
+From e6676ebbd2ab0a6283d96c797dbe93552c1a222c Mon Sep 17 00:00:00 2001
+From: Muh Muhten <muh.muhten@gmail.com>
+Date: Mon, 18 Feb 2019 21:00:59 -0500
+Subject: [PATCH 3/9] Simplify definition of block_bind_referenced
+
+---
+ src/compile.c | 49 ++++++++-----------------------------------------
+ 1 file changed, 8 insertions(+), 41 deletions(-)
+
+--- a/src/compile.c
++++ b/src/compile.c
+@@ -317,20 +317,6 @@ static int block_count_actuals(block b)
+   return args;
+ }
+-static int block_count_refs(block binder, block body) {
+-  int nrefs = 0;
+-  for (inst* i = body.first; i; i = i->next) {
+-    if (i != binder.first && i->bound_by == binder.first) {
+-      nrefs++;
+-    }
+-    // counting recurses into closures
+-    nrefs += block_count_refs(binder, i->subfn);
+-    // counting recurses into argument list
+-    nrefs += block_count_refs(binder, i->arglist);
+-  }
+-  return nrefs;
+-}
+-
+ static int block_bind_subblock_inner(int* any_unbound, block binder, block body, int bindflags, int break_distance) {
+   assert(block_is_single(binder));
+   assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD));
+@@ -434,37 +420,18 @@ block block_bind_library(block binder, b
+   return body; // We don't return a join because we don't want those sticking around...
+ }
+-// Bind binder to body and throw away any defs in binder not referenced
+-// (directly or indirectly) from body.
++// Bind binder to body, then throw it away if not referenced.
+ block block_bind_referenced(block binder, block body, int bindflags) {
++  assert(block_is_single(binder));
+   assert(block_has_only_binders(binder, bindflags));
+   bindflags |= OP_HAS_BINDING;
+-  block refd = gen_noop();
+-  block unrefd = gen_noop();
+-  int nrefs;
+-  for (int last_kept = 0, kept = 0; ; ) {
+-    for (inst* curr; (curr = block_take(&binder));) {
+-      block b = inst_block(curr);
+-      nrefs = block_bind_each(b, body, bindflags);
+-      // Check if this binder is referenced from any of the ones we
+-      // already know are referenced by body.
+-      nrefs += block_count_refs(b, refd);
+-      nrefs += block_count_refs(b, body);
+-      if (nrefs) {
+-        refd = BLOCK(refd, b);
+-        kept++;
+-      } else {
+-        unrefd = BLOCK(unrefd, b);
+-      }
+-    }
+-    if (kept == last_kept)
+-      break;
+-    last_kept = kept;
+-    binder = unrefd;
+-    unrefd = gen_noop();
++
++  if (block_bind_subblock(binder, body, bindflags, 0) == 0) {
++    block_free(binder);
++  } else {
++    body = BLOCK(binder, body);
+   }
+-  block_free(unrefd);
+-  return block_join(refd, body);
++  return body;
+ }
+ static void block_mark_referenced(block body) {
diff --git a/utils/jq/patches/0004-Pass-on-the-error-message-when-rel_path-is-invalid.patch b/utils/jq/patches/0004-Pass-on-the-error-message-when-rel_path-is-invalid.patch
new file mode 100644 (file)
index 0000000..39747cf
--- /dev/null
@@ -0,0 +1,40 @@
+From 2e3dbb884199bba6cc07345f6d394f1ac53465ac Mon Sep 17 00:00:00 2001
+From: Muh Muhten <muh.muhten@gmail.com>
+Date: Tue, 19 Feb 2019 00:34:04 -0500
+Subject: [PATCH 4/9] Pass on the error message when rel_path is invalid
+
+"Module path must be a string" is not a useful error message when the
+reason the module path isn't a string is because the string it was got
+replaced with an invalid with an error message for some other reason.
+
+Also fixes a few memory leaks on early exits.
+---
+ src/linker.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+--- a/src/linker.c
++++ b/src/linker.c
+@@ -138,10 +138,20 @@ static jv jv_basename(jv name) {
+ // Asummes validated relative path to module
+ static jv find_lib(jq_state *jq, jv rel_path, jv search, const char *suffix, jv jq_origin, jv lib_origin) {
+-  if (jv_get_kind(search) != JV_KIND_ARRAY)
+-    return jv_invalid_with_msg(jv_string_fmt("Module search path must be an array"));
+-  if (jv_get_kind(rel_path) != JV_KIND_STRING)
++  if (!jv_is_valid(rel_path)) {
++    jv_free(search);
++    return rel_path;
++  }
++  if (jv_get_kind(rel_path) != JV_KIND_STRING) {
++    jv_free(rel_path);
++    jv_free(search);
+     return jv_invalid_with_msg(jv_string_fmt("Module path must be a string"));
++  }
++  if (jv_get_kind(search) != JV_KIND_ARRAY) {
++    jv_free(rel_path);
++    jv_free(search);
++    return jv_invalid_with_msg(jv_string_fmt("Module search path must be an array"));
++  }
+   struct stat st;
+   int ret;
diff --git a/utils/jq/patches/0005-Catch-.-as-the-first-component-of-a-module-path.patch b/utils/jq/patches/0005-Catch-.-as-the-first-component-of-a-module-path.patch
new file mode 100644 (file)
index 0000000..8c23bf4
--- /dev/null
@@ -0,0 +1,50 @@
+From d0fe86177427e0c3bc2cec1436d74472e4b618dd Mon Sep 17 00:00:00 2001
+From: Muh Muhten <muh.muhten@gmail.com>
+Date: Tue, 19 Feb 2019 00:35:40 -0500
+Subject: [PATCH 5/9] Catch .. as the first component of a module path
+
+Only the second and subsequent path components were being checked, which
+I guess is theoretically security-relevant.
+
+There's no apparent point to reconstructing the path after splitting it
+by adding /s back in, either.
+---
+ src/linker.c | 9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+--- a/src/linker.c
++++ b/src/linker.c
+@@ -98,12 +98,9 @@ static jv validate_relpath(jv name) {
+     return res;
+   }
+   jv components = jv_string_split(jv_copy(name), jv_string("/"));
+-  jv rp = jv_array_get(jv_copy(components), 0);
+-  components = jv_array_slice(components, 1, jv_array_length(jv_copy(components)));
+   jv_array_foreach(components, i, x) {
+     if (!strcmp(jv_string_value(x), "..")) {
+       jv_free(x);
+-      jv_free(rp);
+       jv_free(components);
+       jv res = jv_invalid_with_msg(jv_string_fmt("Relative paths to modules may not traverse to parent directories (%s)", s));
+       jv_free(name);
+@@ -111,18 +108,16 @@ static jv validate_relpath(jv name) {
+     }
+     if (i > 0 && jv_equal(jv_copy(x), jv_array_get(jv_copy(components), i - 1))) {
+       jv_free(x);
+-      jv_free(rp);
+       jv_free(components);
+       jv res = jv_invalid_with_msg(jv_string_fmt("module names must not have equal consecutive components: %s",
+                                                  jv_string_value(name)));
+       jv_free(name);
+       return res;
+     }
+-    rp = jv_string_concat(rp, jv_string_concat(jv_string("/"), x));
++    jv_free(x);
+   }
+   jv_free(components);
+-  jv_free(name);
+-  return rp;
++  return name;
+ }
+ // Assumes name has been validated
diff --git a/utils/jq/patches/0006-Replace-TOP-before-imports-special-case-with-assert.patch b/utils/jq/patches/0006-Replace-TOP-before-imports-special-case-with-assert.patch
new file mode 100644 (file)
index 0000000..ae2c406
--- /dev/null
@@ -0,0 +1,37 @@
+From a114b871e460ef2ddcf7698bc6b18651c976626a Mon Sep 17 00:00:00 2001
+From: Muh Muhten <muh.muhten@gmail.com>
+Date: Tue, 19 Feb 2019 00:14:53 -0500
+Subject: [PATCH 6/9] Replace TOP-before-imports special case with assert
+
+The case isn't actually possible afaict.
+---
+ src/compile.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+--- a/src/compile.c
++++ b/src/compile.c
+@@ -469,10 +469,10 @@ block block_drop_unreferenced(block body
+ jv block_take_imports(block* body) {
+   jv imports = jv_array();
+-  inst* top = NULL;
+-  if (body->first && body->first->op == TOP) {
+-    top = block_take(body);
+-  }
++  /* Parser should never generate TOP before imports */
++  assert(!(body->first && body->first->op == TOP && body->first->next &&
++        (body->first->next->op == MODULEMETA || body->first->next->op == DEPS)));
++
+   while (body->first && (body->first->op == MODULEMETA || body->first->op == DEPS)) {
+     inst* dep = block_take(body);
+     if (dep->op == DEPS) {
+@@ -480,9 +480,6 @@ jv block_take_imports(block* body) {
+     }
+     inst_free(dep);
+   }
+-  if (top) {
+-    *body = block_join(inst_block(top),*body);
+-  }
+   return imports;
+ }
diff --git a/utils/jq/patches/0007-Add-import-metadata-key-optional.patch b/utils/jq/patches/0007-Add-import-metadata-key-optional.patch
new file mode 100644 (file)
index 0000000..8b1d805
--- /dev/null
@@ -0,0 +1,39 @@
+From 90b92d8c73446bb50eee14ca8d88c5224002001a Mon Sep 17 00:00:00 2001
+From: Muh Muhten <muh.muhten@gmail.com>
+Date: Sun, 15 Oct 2017 01:57:17 -0400
+Subject: [PATCH 7/9] Add import metadata key "optional"
+
+A library marked is imported if found, but silently skipped if missing.
+This is the desired semantic for the auto-include at ~/.jq
+---
+ src/linker.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/src/linker.c
++++ b/src/linker.c
+@@ -246,6 +246,9 @@ static int process_dependencies(jq_state
+     jv v = jv_object_get(jv_copy(dep), jv_string("raw"));
+     if (jv_get_kind(v) == JV_KIND_TRUE)
+       raw = 1;
++    int optional = 0;
++    if (jv_get_kind(jv_object_get(jv_copy(dep), jv_string("optional"))) == JV_KIND_TRUE)
++      optional = 1;
+     jv_free(v);
+     jv relpath = validate_relpath(jv_object_get(jv_copy(dep), jv_string("relpath")));
+     jv as = jv_object_get(jv_copy(dep), jv_string("as"));
+@@ -259,10 +262,14 @@ static int process_dependencies(jq_state
+     jv resolved = find_lib(jq, relpath, search, is_data ? ".json" : ".jq", jv_copy(jq_origin), jv_copy(lib_origin));
+     // XXX ...move the rest of this into a callback.
+     if (!jv_is_valid(resolved)) {
++      jv_free(as);
++      if (optional) {
++        jv_free(resolved);
++        continue;
++      }
+       jv emsg = jv_invalid_get_msg(resolved);
+       jq_report_error(jq, jv_string_fmt("jq: error: %s\n",jv_string_value(emsg)));
+       jv_free(emsg);
+-      jv_free(as);
+       jv_free(deps);
+       jv_free(jq_origin);
+       jv_free(lib_origin);
diff --git a/utils/jq/patches/0008-Load-.jq-as-a-library-instead-of-with-builtins.patch b/utils/jq/patches/0008-Load-.jq-as-a-library-instead-of-with-builtins.patch
new file mode 100644 (file)
index 0000000..4d93655
--- /dev/null
@@ -0,0 +1,99 @@
+From 4c5a08b9e01ebfce5c8914dd82c1722737bbecab Mon Sep 17 00:00:00 2001
+From: Muh Muhten <muh.muhten@gmail.com>
+Date: Tue, 19 Feb 2019 00:39:34 -0500
+Subject: [PATCH 8/9] Load ~/.jq as a library instead of with builtins
+
+Remove the special code which loads ~/.jq in builtin.c, and instead glue
+an optional include which points to the same file onto the main program
+in linker.c.
+
+Fixes a minor bug where errors in ~/.jq would be labelled <builtin>.
+---
+ src/builtin.c | 44 +++++++-------------------------------------
+ src/linker.c  | 10 ++++++++++
+ 2 files changed, 17 insertions(+), 37 deletions(-)
+
+--- a/src/builtin.c
++++ b/src/builtin.c
+@@ -1706,9 +1706,7 @@ static block bind_bytecoded_builtins(blo
+   return block_bind(builtins, b, OP_IS_CALL_PSEUDO);
+ }
+-
+-
+-static const char* const jq_builtins =
++static const char jq_builtins[] =
+ /* Include jq-coded builtins */
+ #include "src/builtin.inc"
+@@ -1744,45 +1742,17 @@ static block gen_builtin_list(block buil
+   return BLOCK(builtins, gen_function("builtins", gen_noop(), gen_const(list)));
+ }
+-static int builtins_bind_one(jq_state *jq, block* bb, const char* code) {
+-  struct locfile* src;
+-  src = locfile_init(jq, "<builtin>", code, strlen(code));
+-  block funcs;
+-  int nerrors = jq_parse_library(src, &funcs);
+-  if (nerrors == 0) {
+-    *bb = block_bind(funcs, *bb, OP_IS_CALL_PSEUDO);
+-  }
+-  locfile_free(src);
+-  return nerrors;
+-}
+-
+-static int slurp_lib(jq_state *jq, block* bb) {
+-  int nerrors = 0;
+-  char* home = getenv("HOME");
+-  if (home) {    // silently ignore no $HOME
+-    jv filename = jv_string_append_str(jv_string(home), "/.jq");
+-    jv data = jv_load_file(jv_string_value(filename), 1);
+-    if (jv_is_valid(data)) {
+-      nerrors = builtins_bind_one(jq, bb, jv_string_value(data) );
+-    }
+-    jv_free(filename);
+-    jv_free(data);
+-  }
+-  return nerrors;
+-}
+-
+ int builtins_bind(jq_state *jq, block* bb) {
+-  block builtins = gen_noop();
+-  int nerrors = slurp_lib(jq, bb);
+-  if (nerrors) {
+-    block_free(*bb);
+-    return nerrors;
+-  }
+-  nerrors = builtins_bind_one(jq, &builtins, jq_builtins);
++  block builtins;
++  struct locfile* src = locfile_init(jq, "<builtin>", jq_builtins, sizeof(jq_builtins)-1);
++  int nerrors = jq_parse_library(src, &builtins);
+   assert(!nerrors);
++  locfile_free(src);
++
+   builtins = bind_bytecoded_builtins(builtins);
+   builtins = gen_cbinding(function_list, sizeof(function_list)/sizeof(function_list[0]), builtins);
+   builtins = gen_builtin_list(builtins);
++
+   *bb = block_bind(builtins, *bb, OP_IS_CALL_PSEUDO);
+   *bb = block_drop_unreferenced(*bb);
+   return nerrors;
+--- a/src/linker.c
++++ b/src/linker.c
+@@ -387,6 +387,16 @@ int load_program(jq_state *jq, struct lo
+   if (nerrors)
+     return nerrors;
++  char* home = getenv("HOME");
++  if (home) {    // silently ignore no $HOME
++    /* Import ~/.jq as a library named "" found in $HOME */
++    block import = gen_import_meta(gen_import("", NULL, 0),
++        gen_const(JV_OBJECT(
++            jv_string("optional"), jv_true(),
++            jv_string("search"), jv_string(home))));
++    program = BLOCK(import, program);
++  }
++
+   nerrors = process_dependencies(jq, jq_get_jq_origin(jq), jq_get_prog_origin(jq), &program, &lib_state);
+   block libs = gen_noop();
+   for (uint64_t i = 0; i < lib_state.ct; ++i) {
diff --git a/utils/jq/patches/0009-Make-builtin-binding-fast-again-by-binding-only-refe.patch b/utils/jq/patches/0009-Make-builtin-binding-fast-again-by-binding-only-refe.patch
new file mode 100644 (file)
index 0000000..0731e0f
--- /dev/null
@@ -0,0 +1,170 @@
+From 916c12fb593005771a6ce098f5a7da4dec0051d1 Mon Sep 17 00:00:00 2001
+From: Muh Muhten <muh.muhten@gmail.com>
+Date: Wed, 20 Feb 2019 01:48:56 -0500
+Subject: [PATCH 9/9] Make builtin binding fast again by binding only
+ referenced symbols
+
+Avoid doing the internal binding of top-level symbols in the parser,
+leaving that work to be done in a post-processing step. For builtins,
+this lets us do a reference-aware bind step (block_bind_incremental)
+*after* generating builtins/0.
+
+Libraries are a bit trickier since they may be bound multiple times, so
+instead of thinking through the implications I added (block_bind_self)
+to resolve all internal symbols immediately.
+---
+ src/builtin.c |  4 ++--
+ src/compile.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++----
+ src/compile.h |  3 ++-
+ src/linker.c  |  1 +
+ src/parser.c  |  2 +-
+ src/parser.y  |  2 +-
+ 6 files changed, 54 insertions(+), 9 deletions(-)
+
+--- a/src/builtin.c
++++ b/src/builtin.c
+@@ -1703,7 +1703,7 @@ static block bind_bytecoded_builtins(blo
+                                             BLOCK(gen_param("start"), gen_param("end")),
+                                             range));
+   }
+-  return block_bind(builtins, b, OP_IS_CALL_PSEUDO);
++  return BLOCK(builtins, b);
+ }
+ static const char jq_builtins[] =
+@@ -1753,7 +1753,7 @@ int builtins_bind(jq_state *jq, block* b
+   builtins = gen_cbinding(function_list, sizeof(function_list)/sizeof(function_list[0]), builtins);
+   builtins = gen_builtin_list(builtins);
+-  *bb = block_bind(builtins, *bb, OP_IS_CALL_PSEUDO);
++  *bb = block_bind_incremental(builtins, *bb, OP_IS_CALL_PSEUDO);
+   *bb = block_drop_unreferenced(*bb);
+   return nerrors;
+ }
+--- a/src/compile.c
++++ b/src/compile.c
+@@ -222,8 +222,9 @@ block gen_op_unbound(opcode op, const ch
+ block gen_op_var_fresh(opcode op, const char* name) {
+   assert(opcode_describe(op)->flags & OP_HAS_VARIABLE);
+-  return block_bind(gen_op_unbound(op, name),
+-                    gen_noop(), OP_HAS_VARIABLE);
++  block b = gen_op_unbound(op, name);
++  b.first->bound_by = b.first;
++  return b;
+ }
+ block gen_op_bound(opcode op, block binder) {
+@@ -382,7 +383,7 @@ static int block_bind_each(block binder,
+   return nrefs;
+ }
+-block block_bind(block binder, block body, int bindflags) {
++static block block_bind(block binder, block body, int bindflags) {
+   block_bind_each(binder, body, bindflags);
+   return block_join(binder, body);
+ }
+@@ -434,6 +435,48 @@ block block_bind_referenced(block binder
+   return body;
+ }
++static inst* block_take_last(block* b) {
++  inst* i = b->last;
++  if (i == 0)
++    return 0;
++  if (i->prev) {
++    i->prev->next = i->next;
++    b->last = i->prev;
++    i->prev = 0;
++  } else {
++    b->first = 0;
++    b->last = 0;
++  }
++  return i;
++}
++
++// Binds a sequence of binders, which *must not* alrady be bound to each other,
++// to body, throwing away unreferenced defs
++block block_bind_incremental(block binder, block body, int bindflags) {
++  assert(block_has_only_binders(binder, bindflags));
++  bindflags |= OP_HAS_BINDING;
++
++  inst* curr;
++  while ((curr = block_take_last(&binder))) {
++    body = block_bind_referenced(inst_block(curr), body, bindflags);
++  }
++  return body;
++}
++
++block block_bind_self(block binder, int bindflags) {
++  assert(block_has_only_binders(binder, bindflags));
++  bindflags |= OP_HAS_BINDING;
++  block body = gen_noop();
++
++  inst* curr;
++  while ((curr = block_take_last(&binder))) {
++    block b = inst_block(curr);
++    block_bind_subblock(b, body, bindflags, 0);
++    body = BLOCK(b, body);
++  }
++  return body;
++}
++
+ static void block_mark_referenced(block body) {
+   int saw_top = 0;
+   for (inst* i = body.last; i; i = i->prev) {
+@@ -1074,7 +1117,7 @@ block gen_cbinding(const struct cfunctio
+     i->imm.cfunc = &cfunctions[cfunc];
+     i->symbol = strdup(i->imm.cfunc->name);
+     i->any_unbound = 0;
+-    code = block_bind(inst_block(i), code, OP_IS_CALL_PSEUDO);
++    code = BLOCK(inst_block(i), code);
+   }
+   return code;
+ }
+--- a/src/compile.h
++++ b/src/compile.h
+@@ -72,9 +72,10 @@ int block_has_only_binders(block, int bi
+ int block_has_main(block);
+ int block_is_funcdef(block b);
+ int block_is_single(block b);
+-block block_bind(block binder, block body, int bindflags);
+ block block_bind_library(block binder, block body, int bindflags, const char* libname);
+ block block_bind_referenced(block binder, block body, int bindflags);
++block block_bind_incremental(block binder, block body, int bindflags);
++block block_bind_self(block binder, int bindflags);
+ block block_drop_unreferenced(block body);
+ jv block_take_imports(block* body);
+--- a/src/linker.c
++++ b/src/linker.c
+@@ -336,6 +336,7 @@ static int load_library(jq_state *jq, jv
+                                       jv_string(dirname(lib_origin)),
+                                       &program, lib_state);
+       free(lib_origin);
++      program = block_bind_self(program, OP_IS_CALL_PSEUDO);
+     }
+   }
+   state_idx = lib_state->ct++;
+--- a/src/parser.c
++++ b/src/parser.c
+@@ -2425,7 +2425,7 @@ yyreduce:
+   case 9:
+ #line 333 "src/parser.y" /* yacc.c:1646  */
+     {
+-  (yyval.blk) = block_bind((yyvsp[-1].blk), (yyvsp[0].blk), OP_IS_CALL_PSEUDO);
++  (yyval.blk) = block_join((yyvsp[-1].blk), (yyvsp[0].blk));
+ }
+ #line 2431 "src/parser.c" /* yacc.c:1646  */
+     break;
+--- a/src/parser.y
++++ b/src/parser.y
+@@ -331,7 +331,7 @@ FuncDefs:
+   $$ = gen_noop();
+ } |
+ FuncDef FuncDefs {
+-  $$ = block_bind($1, $2, OP_IS_CALL_PSEUDO);
++  $$ = block_join($1, $2);
+ }
+ Exp: