1 module workspaced.dparseext; 2 3 import std.algorithm; 4 import std.array; 5 import std.experimental.logger; 6 import std.string; 7 8 import dparse.ast; 9 import dparse.lexer; 10 import dparse.parser; 11 import dparse.rollback_allocator; 12 import dsymbol.builtin.names; 13 import dsymbol.modulecache : ASTAllocator, ModuleCache; 14 15 string makeString(in IdentifierOrTemplateChain c) 16 { 17 return c.identifiersOrTemplateInstances.map!(a => a.identifier.text).join("."); 18 } 19 20 string astToString(T, Args...)(in T ast, Args args) 21 { 22 import dparse.formatter : Formatter; 23 24 if (!ast) 25 return null; 26 27 auto app = appender!string(); 28 auto formatter = new Formatter!(typeof(app))(app); 29 formatter.format(ast, args); 30 return app.data; 31 } 32 33 string paramsToString(Dec)(const Dec dec) 34 { 35 import dparse.formatter : Formatter; 36 37 auto app = appender!string(); 38 auto formatter = new Formatter!(typeof(app))(app); 39 40 static if (is(Dec == FunctionDeclaration) || is(Dec == Constructor)) 41 { 42 formatter.format(dec.parameters); 43 } 44 else static if (is(Dec == TemplateDeclaration)) 45 { 46 formatter.format(dec.templateParameters); 47 } 48 49 return app.data; 50 } 51 52 /// Other tokens 53 private enum dynamicTokens = [ 54 "specialTokenSequence", "comment", "identifier", "scriptLine", 55 "whitespace", "doubleLiteral", "floatLiteral", "idoubleLiteral", 56 "ifloatLiteral", "intLiteral", "longLiteral", "realLiteral", 57 "irealLiteral", "uintLiteral", "ulongLiteral", "characterLiteral", 58 "dstringLiteral", "stringLiteral", "wstringLiteral" 59 ]; 60 61 string tokenText(const Token token) 62 { 63 switch (token.type) 64 { 65 static foreach (T; dynamicTokens) 66 { 67 case tok!T: 68 } 69 return token.text; 70 default: 71 return str(token.type); 72 } 73 } 74 75 size_t textLength(const Token token) 76 { 77 return token.tokenText.length; 78 } 79 80 bool isSomeString(const Token token) 81 { 82 switch (token.type) 83 { 84 case tok!"characterLiteral": 85 case tok!"dstringLiteral": 86 case tok!"stringLiteral": 87 case tok!"wstringLiteral": 88 return true; 89 default: 90 return false; 91 } 92 } 93 94 bool isLikeIdentifier(const Token token) 95 { 96 import workspaced.helpers; 97 98 auto text = token.tokenText; 99 return text.length && text[0].isIdentifierChar; 100 } 101 102 /// Performs a binary search to find the token containing the search location. 103 /// Params: 104 /// tokens = the token array to search in. 105 /// bytes = the byte index the token should be in. 106 /// Returns: the index of the token inside the given tokens array which 107 /// contains the character specified at the given byte. This will be the first 108 /// token that is `tok.index == bytes` or before the next token that is too far. 109 /// If no tokens match, this will return `tokens.length`. 110 /// 111 /// This is equivalent to the following code: 112 /// --- 113 /// foreach (i, tok; tokens) 114 /// { 115 /// if (tok.index == bytes) 116 /// return i; 117 /// else if (tok.index > bytes) 118 /// return i - 1; 119 /// } 120 /// return tokens.length; 121 /// --- 122 size_t tokenIndexAtByteIndex(scope const(Token)[] tokens, size_t bytes) 123 out (v; v <= tokens.length) 124 { 125 if (!tokens.length || tokens[0].index >= bytes) 126 return 0; 127 128 // find where to start using binary search 129 size_t l = 0; 130 size_t r = tokens.length - 1; 131 while (l < r) 132 { 133 size_t m = (l + r) / 2; 134 if (tokens[m].index < bytes) 135 l = m + 1; 136 else 137 r = m - 1; 138 } 139 size_t start = r; 140 141 // search remaining with linear search 142 foreach (i, tok; tokens[start .. $]) 143 { 144 if (tok.index == bytes) 145 return start + i; 146 else if (tok.index > bytes) 147 return start + i - 1; 148 } 149 return tokens.length; 150 } 151 152 /// ditto 153 size_t tokenIndexAtPosition(scope const(Token)[] tokens, uint line, uint column) 154 out (v; v <= tokens.length) 155 { 156 int cmp(Token token) 157 { 158 if (token.line != line) 159 return token.line < line ? -1 : 1; 160 else if (token.column != column) 161 return token.column < column ? -1 : 1; 162 else 163 return 0; 164 } 165 166 if (!tokens.length || cmp(tokens[0]) >= 0) 167 return 0; 168 169 // find where to start using binary search 170 size_t l = 0; 171 size_t r = tokens.length - 1; 172 while (l < r) 173 { 174 size_t m = (l + r) / 2; 175 if (cmp(tokens[m]) < 0) 176 l = m + 1; 177 else 178 r = m - 1; 179 } 180 size_t start = r; 181 182 // search remaining with linear search 183 foreach (i, tok; tokens[start .. $]) 184 { 185 if (cmp(tok) == 0) 186 return start + i; 187 else if (cmp(tok) > 0) 188 return start + i - 1; 189 } 190 return tokens.length; 191 } 192 193 /// 194 unittest 195 { 196 StringCache stringCache = StringCache(StringCache.defaultBucketCount); 197 const(Token)[] tokens = getTokensForParser(cast(ubyte[]) `module foo.bar; 198 199 // ok 200 void main(string[] args) 201 { 202 } 203 204 /// documentation 205 void foo() 206 { 207 } 208 `, LexerConfig.init, &stringCache); 209 210 auto get(size_t bytes) 211 { 212 auto i = tokens.tokenIndexAtByteIndex(bytes); 213 if (i == tokens.length) 214 return tok!"__EOF__"; 215 return tokens[i].type; 216 } 217 218 assert(get(0) == tok!"module"); 219 assert(get(4) == tok!"module"); 220 assert(get(6) == tok!"module"); 221 assert(get(7) == tok!"identifier"); 222 assert(get(9) == tok!"identifier"); 223 assert(get(10) == tok!"."); 224 assert(get(11) == tok!"identifier"); 225 assert(get(16) == tok!";"); 226 assert(get(49) == tok!"{"); 227 assert(get(48) == tok!"{"); 228 assert(get(47) == tok!")"); 229 assert(get(1000) == tok!"__EOF__"); 230 231 // TODO: process trivia fields in libdparse >=0.15.0 when it releases 232 //assert(get(20) == tok!"comment"); 233 assert(get(20) == tok!";"); 234 235 // assert(get(57) == tok!"comment"); 236 } 237 238 bool isSomeString(const IdType type) 239 { 240 switch (type) 241 { 242 case tok!"stringLiteral": 243 case tok!"wstringLiteral": 244 case tok!"dstringLiteral": 245 return true; 246 default: 247 return false; 248 } 249 } 250 251 /// Tries to evaluate an expression if it evaluates to a string. 252 /// Returns: `null` if the resulting value is not a string or could not be 253 /// evaluated. 254 string evaluateExpressionString(const PrimaryExpression expr) 255 in (expr !is null) 256 { 257 return evaluateExpressionString(expr.primary); 258 } 259 260 /// ditto 261 string evaluateExpressionString(const UnaryExpression expr) 262 in (expr !is null) 263 { 264 if (expr.primaryExpression) 265 return evaluateExpressionString(expr.primaryExpression); 266 else 267 return null; 268 } 269 270 /// ditto 271 string evaluateExpressionString(const ExpressionNode expr) 272 in (expr !is null) 273 { 274 // maybe we want to support simple concatenation here some time 275 276 if (auto unary = cast(UnaryExpression) expr) 277 return evaluateExpressionString(unary); 278 else 279 return null; 280 } 281 282 /// ditto 283 string evaluateExpressionString(const Token token) 284 { 285 import dparse.strings : unescapeString, isStringLiteral; 286 287 switch (token.type) 288 { 289 case tok!"stringLiteral": 290 case tok!"wstringLiteral": 291 case tok!"dstringLiteral": 292 auto str = token.text; 293 294 // we want to unquote here 295 // foreach because implicit concatenation can combine multiple strings 296 auto ret = appender!string; 297 scope StringCache cache = StringCache(16); 298 LexerConfig config; 299 config.commentBehavior = CommentBehavior.noIntern; 300 config.stringBehavior = StringBehavior.source; 301 config.whitespaceBehavior = WhitespaceBehavior.skip; 302 config.fileName = "evaluate-string-stdin"; 303 foreach (t; DLexer(str, config, &cache)) 304 { 305 switch (t.type) 306 { 307 case tok!"stringLiteral": 308 case tok!"wstringLiteral": 309 case tok!"dstringLiteral": 310 if (t.text.isStringLiteral) 311 { 312 ret ~= unescapeString(t.text); 313 } 314 else 315 { 316 debug 317 { 318 throw new Exception("Invalid stringLiteral in stringLiteral token: `" ~ t.text ~ '`'); 319 } 320 else 321 { 322 warningf("Invalid stringLiteral in stringLiteral token: `%s`", t.text); 323 return str; 324 } 325 } 326 break; 327 default: 328 // unexpected token, return input because it might already be 329 // unescaped 330 return str; 331 } 332 } 333 334 return ret.data; 335 default: 336 return null; 337 } 338 } 339 340 /// Finds the deepest non-null node of any BaseNode. (like visiting the tree) 341 /// Aborts on types that contain `DeclarationOrStatement` or `Declaration[]` 342 /// fields. 343 /// Useful for getting the IfStatement out of a DeclarationOrStatement without 344 /// traversing its children. 345 BaseNode findDeepestNonBlockNode(T : BaseNode)(T ast) 346 { 347 static assert(!is(T == BaseNode), "Passed in a BaseNode, that's probably not what you wanted to do (pass in the most specific type you have)"); 348 bool nonProcess = false; 349 foreach (member; ast.tupleof) 350 { 351 static if (is(typeof(member) : DeclarationOrStatement) 352 || is(typeof(member) : Declaration[])) 353 { 354 nonProcess = true; 355 } 356 } 357 358 if (nonProcess) 359 return ast; 360 361 foreach (member; ast.tupleof) 362 { 363 static if (is(typeof(member) : BaseNode)) 364 { 365 if (member !is null) 366 { 367 return findDeepestNonBlockNode(member); 368 } 369 } 370 } 371 return ast; 372 } 373 374 /// Gets the final `else` block of an if. Will return a node of type 375 /// `IfStatement` if it's an `else if` block. Returns null if there is no single 376 /// else statement. 377 BaseNode getIfElse(IfStatement ifStmt) 378 { 379 if (!ifStmt.elseStatement) 380 return null; 381 382 while (true) 383 { 384 auto elseStmt = ifStmt.elseStatement; 385 if (!elseStmt) 386 return ifStmt; 387 388 auto stmtInElse = elseStmt.findDeepestNonBlockNode; 389 assert(stmtInElse !is elseStmt); 390 391 if (cast(IfStatement)stmtInElse) 392 ifStmt = cast(IfStatement)stmtInElse; 393 else 394 return stmtInElse; 395 } 396 } 397 398 unittest 399 { 400 StringCache stringCache = StringCache(StringCache.defaultBucketCount); 401 RollbackAllocator rba; 402 IfStatement parseIfStmt(string code) 403 { 404 const(Token)[] tokens = getTokensForParser(cast(ubyte[])code, LexerConfig.init, &stringCache); 405 auto parser = new Parser(); 406 parser.tokens = tokens; 407 parser.allocator = &rba; 408 return parser.parseIfStatement(); 409 } 410 411 alias p = parseIfStmt; 412 assert(getIfElse(p("if (x) {}")) is null); 413 assert(getIfElse(p("if (x) {} else if (y) {}")) !is null); 414 assert(cast(IfStatement)getIfElse(p("if (x) {} else if (y) {}")) !is null, typeid(getIfElse(p("if (x) {} else if (y) {}"))).name); 415 assert(getIfElse(p("if (x) {} else if (y) {} else {}")) !is null); 416 assert(cast(IfStatement)getIfElse(p("if (x) {} else if (y) {} else {}")) is null); 417 } 418 419 C[] substr(C)(C[] s, size_t[2] range) 420 { 421 return substr(s, range[0], range[1]); 422 } 423 424 C[] substr(C)(C[] s, size_t start, size_t end) 425 { 426 if (!s.length) 427 return s; 428 if (start < 0) 429 start = 0; 430 if (start >= s.length) 431 start = s.length - 1; // @suppress(dscanner.suspicious.length_subtraction) 432 if (end > s.length) 433 end = s.length; 434 if (end < start) 435 return s[start .. start]; 436 return s[start .. end]; 437 }