|
| 1 | +module tree_query; |
| 2 | +import std.stdio; |
| 3 | +import std.variant; |
| 4 | +import std.string; |
| 5 | +import query; |
| 6 | +import interp; |
| 7 | +/++ |
| 8 | + + Extract the query from the input. |
| 9 | + + These forms are supported, where QUERY represents the query itself. |
| 10 | + + - QUERY |
| 11 | + + - {{query: QUERY }} |
| 12 | + + - {{query:QUERY}} |
| 13 | + + - {{[[query]]: QUERY }} |
| 14 | + + |
| 15 | + + Leading and trailing whitespace are allowed before the start and end of the query. |
| 16 | + + |
| 17 | + + Note that Roam does not accept {{ query: nor {{ query : |
| 18 | + + |
| 19 | + +/ |
| 20 | +string extractQuery(string query) { |
| 21 | + import std.string; |
| 22 | + query = strip(query); // Allow leading and trailing whitespace |
| 23 | + if (query.startsWith("{{")) { |
| 24 | + if (query.endsWith("}}")) { |
| 25 | + // "1"th index is 2 |
| 26 | + auto withoutWhitespace = stripLeft(query[2..$]); |
| 27 | + enum command = "query:"; |
| 28 | + if (withoutWhitespace.startsWith(command)) { |
| 29 | + auto result = query[2+command.length..$-2].strip; |
| 30 | + return result; |
| 31 | + } else { |
| 32 | + throw new Error("Unrecognized command, command must be 'query:'"); |
| 33 | + } |
| 34 | + } else { |
| 35 | + throw new Exception("Malformed query. Must end with '}}'"); |
| 36 | + } |
| 37 | + } else { |
| 38 | + return query; |
| 39 | + } |
| 40 | +} |
| 41 | +unittest { |
| 42 | + const nakedQueries = ["{and: [[foo]] [[bar]] }", "{or: [[foo]] [[bar]] }"]; |
| 43 | + foreach (nakedQuery; nakedQueries) { |
| 44 | + assert(extractQuery(nakedQuery) == nakedQuery); |
| 45 | + assert(extractQuery("{{query:" ~ nakedQuery ~ "}}") == nakedQuery); |
| 46 | + assert(extractQuery(" {{query: " ~ nakedQuery ~ " }} ") == nakedQuery); |
| 47 | + } |
| 48 | +} |
| 49 | +struct ParsedQuery { |
| 50 | + string[] terms; |
| 51 | + Form form; |
| 52 | +} |
| 53 | + |
| 54 | +ParsedQuery parseQuery(string query) { |
| 55 | + ParsedQuery q; |
| 56 | + return q; |
| 57 | +} |
| 58 | + |
| 59 | +// TODO: Implement atom dedup |
| 60 | +// TODO: Implement support for arbitrary atoms |
| 61 | +// TODO: Allocate Form from an array for efficiency |
| 62 | +ParsedQuery booleanQuery(string query, ref ubyte bitshift) { |
| 63 | + import std.array; |
| 64 | + ParsedQuery q; |
| 65 | + debug writeln("subquery", query); |
| 66 | + query = query.strip(); |
| 67 | + auto start = query.split(" ")[0]; |
| 68 | + if (start == "{and:") { |
| 69 | + q.form.op = Op.AND; |
| 70 | + } else if (start == "{or:") { |
| 71 | + q.form.op = Op.OR; |
| 72 | + } else if (start == "{not:") { |
| 73 | + q.form.op = Op.NOT; |
| 74 | + } else { |
| 75 | + throw new Exception("Unrecognized keyword" ~ start); |
| 76 | + } |
| 77 | + // (\[\[.+\]\])|(".+") |
| 78 | + // recursive call |
| 79 | + // using the rest of the words, build a form |
| 80 | + int[] indexes; |
| 81 | + int ntoskip = 0; |
| 82 | + foreach (i, token; query[start.length..$].split("]]")) { |
| 83 | + debug writeln("Parse token", token); |
| 84 | + if (ntoskip > 0) { |
| 85 | + ntoskip--; |
| 86 | + continue; |
| 87 | + } |
| 88 | + if (token.startsWith("[[") || token.startsWith(" [[")) { |
| 89 | + auto text = token.strip() ~ "]]"; |
| 90 | + q.terms ~= text; |
| 91 | + Form inner = { Op.ATOM, bitshift: bitshift++ }; |
| 92 | + debug bitshift.writeln; |
| 93 | + q.form.operands ~= inner; |
| 94 | + } else if (token.strip().startsWith("}")) { |
| 95 | + return q; |
| 96 | + } else { |
| 97 | + auto subquery = booleanQuery( |
| 98 | + query[start.length..$].split("]]")[i..$].join("]]"), |
| 99 | + bitshift |
| 100 | + ); |
| 101 | + q.terms ~= subquery.terms; |
| 102 | + q.form.operands ~= subquery.form; |
| 103 | + ntoskip = cast(int)subquery.terms.length + 1; |
| 104 | + } |
| 105 | + } |
| 106 | + return q; |
| 107 | +} |
| 108 | +unittest { |
| 109 | + ubyte bitshift = 0; |
| 110 | + auto pq = booleanQuery("{and: [[Hi]] {or: [[Blue]] [[White]] } }", bitshift); |
| 111 | + assert(pq.terms == ["[[Hi]]", "[[Blue]]", "[[White]]"]); |
| 112 | + assert(pq.form.operands[1].op == Op.OR); |
| 113 | + assert(pq.form.operands[1].operands[0].bitshift == 1); |
| 114 | + assert(pq.form.operands[1].operands[1].bitshift == 2); |
| 115 | +} |
| 116 | + |
| 117 | +string escape(string text) { |
| 118 | + return text.replace(`\`, `\\`) |
| 119 | + .replace(`"`, `\"`) |
| 120 | + .replace(`\\n`, `\n`) |
| 121 | + .replace(`\\t`, `\t`); |
| 122 | +} |
| 123 | +unittest { |
| 124 | + import std.stdio; |
| 125 | + ubyte n = 0; |
| 126 | + auto parsed = booleanQuery("{and: [[Hi]] [[Hello]] }", n); |
| 127 | + assert(parsed.terms == ["[[Hi]]", "[[Hello]]"]); |
| 128 | +} |
| 129 | +private struct WithConstructor { |
| 130 | + string[] member; |
| 131 | + this(string[] arg, Form a) { |
| 132 | + member = arg; |
| 133 | + } |
| 134 | + void start(string text) { |
| 135 | + } |
| 136 | + void end() { |
| 137 | + } |
| 138 | + } |
| 139 | +unittest { |
| 140 | + import parser; |
| 141 | + Form form; |
| 142 | + parse!(WithConstructor) |
| 143 | + ("Test", 4, "Title", ["arg"], form); |
| 144 | +} |
| 145 | + |
| 146 | +int main(string[] args) { |
| 147 | + import std.getopt, std.file, std.stdio; |
| 148 | + // Parse arguments |
| 149 | + string query; |
| 150 | + if (args[1].strip.startsWith("{")) { |
| 151 | + query = args[1]; |
| 152 | + args = args[2..$]; |
| 153 | + } else { |
| 154 | + throw new Exception("Query must be first parameter"); |
| 155 | + } |
| 156 | + // Read query immediately. If the user has written an invalid query, show |
| 157 | + // an error before we read in all files. |
| 158 | + ubyte n = 0; |
| 159 | + ParsedQuery qu = booleanQuery(query, n); |
| 160 | + string[] inputs; |
| 161 | + string[] inputnames; |
| 162 | + if (args.length == 0) { |
| 163 | + // stdin |
| 164 | + string input; |
| 165 | + string line; |
| 166 | + while ((line = readln()) !is null) |
| 167 | + input ~= line; |
| 168 | + inputnames ~= "stdin"; |
| 169 | + inputs ~= input; |
| 170 | + } |
| 171 | + foreach (filename; args) { |
| 172 | + if (filename.isDir) { |
| 173 | + import std.algorithm.iteration; |
| 174 | + filename.dirEntries(SpanMode.depth).filter!isFile.each!((string filename) { |
| 175 | + // TODO: Proper mechanism to detect and avoid binary files |
| 176 | + import std.utf; |
| 177 | + try { |
| 178 | + inputnames ~= filename; |
| 179 | + // TODO: Stream input from files for cache locality, instead of reading everything in at once |
| 180 | + inputs ~= filename.readText; |
| 181 | + } catch (UTFException) { |
| 182 | + inputnames.length--; |
| 183 | + } |
| 184 | + }); |
| 185 | + } else { |
| 186 | + assert(filename.exists); |
| 187 | + inputnames ~= filename; |
| 188 | + inputs ~= filename.readText; |
| 189 | + } |
| 190 | + } |
| 191 | + for (int i = 0; i < inputs.length; ++i) { |
| 192 | + import parser; |
| 193 | + import std.meta; |
| 194 | + parse!(QueryHandler, doNothing, doNothing, true)(inputs[i], 4, inputnames[i], qu.terms, qu.form); |
| 195 | + // Write each on a separate lines |
| 196 | + } |
| 197 | + return 0; |
| 198 | +} |
0 commit comments