workspaced.dparseext source code

1 module workspaced.dparseext;
2 
3 import std.algorithm;
4 import std.array;
5 import std.experimental.logger;
6 import std.string;
7 
8 import dparse.ast;
9 import dparse.lexer;
10 import dparse.parser;
11 import dparse.rollback_allocator;
12 import dsymbol.builtin.names;
13 import dsymbol.modulecache : ASTAllocator, ModuleCache;
14 
15 string makeString(in IdentifierOrTemplateChain c)
16 {
17 	return c.identifiersOrTemplateInstances.map!(a => a.identifier.text).join(".");
18 }
19 
20 string astToString(T, Args...)(in T ast, Args args)
21 {
22 	import dparse.formatter : Formatter;
23 
24 	if (!ast)
25 		return null;
26 
27 	auto app = appender!string();
28 	auto formatter = new Formatter!(typeof(app))(app);
29 	formatter.format(ast, args);
30 	return app.data;
31 }
32 
33 string paramsToString(Dec)(const Dec dec)
34 {
35 	import dparse.formatter : Formatter;
36 
37 	auto app = appender!string();
38 	auto formatter = new Formatter!(typeof(app))(app);
39 
40 	static if (is(Dec == FunctionDeclaration) || is(Dec == Constructor))
41 	{
42 		formatter.format(dec.parameters);
43 	}
44 	else static if (is(Dec == TemplateDeclaration))
45 	{
46 		formatter.format(dec.templateParameters);
47 	}
48 
49 	return app.data;
50 }
51 
52 /// Other tokens
53 private enum dynamicTokens = [
54 		"specialTokenSequence", "comment", "identifier", "scriptLine",
55 		"whitespace", "doubleLiteral", "floatLiteral", "idoubleLiteral",
56 		"ifloatLiteral", "intLiteral", "longLiteral", "realLiteral",
57 		"irealLiteral", "uintLiteral", "ulongLiteral", "characterLiteral",
58 		"dstringLiteral", "stringLiteral", "wstringLiteral"
59 	];
60 
61 string tokenText(const Token token)
62 {
63 	switch (token.type)
64 	{
65 		static foreach (T; dynamicTokens)
66 		{
67 	case tok!T:
68 		}
69 		return token.text;
70 	default:
71 		return str(token.type);
72 	}
73 }
74 
75 size_t textLength(const Token token)
76 {
77 	return token.tokenText.length;
78 }
79 
80 bool isSomeString(const Token token)
81 {
82 	switch (token.type)
83 	{
84 	case tok!"characterLiteral":
85 	case tok!"dstringLiteral":
86 	case tok!"stringLiteral":
87 	case tok!"wstringLiteral":
88 		return true;
89 	default:
90 		return false;
91 	}
92 }
93 
94 bool isLikeIdentifier(const Token token)
95 {
96 	import workspaced.helpers;
97 
98 	auto text = token.tokenText;
99 	return text.length && text[0].isIdentifierChar;
100 }
101 
102 /// Performs a binary search to find the token containing the search location.
103 /// Params:
104 ///   tokens = the token array to search in.
105 ///   bytes  = the byte index the token should be in.
106 /// Returns: the index of the token inside the given tokens array which
107 /// contains the character specified at the given byte. This will be the first
108 /// token that is `tok.index == bytes` or before the next token that is too far.
109 /// If no tokens match, this will return `tokens.length`.
110 ///
111 /// This is equivalent to the following code:
112 /// ---
113 /// foreach (i, tok; tokens)
114 /// {
115 /// 	if (tok.index == bytes)
116 /// 		return i;
117 /// 	else if (tok.index > bytes)
118 /// 		return i - 1;
119 /// }
120 /// return tokens.length;
121 /// ---
122 size_t tokenIndexAtByteIndex(scope const(Token)[] tokens, size_t bytes)
123 out (v; v <= tokens.length)
124 {
125 	if (!tokens.length || tokens[0].index >= bytes)
126 		return 0;
127 
128 	// find where to start using binary search
129 	size_t l = 0;
130 	size_t r = tokens.length - 1;
131 	while (l < r)
132 	{
133 		size_t m = (l + r) / 2;
134 		if (tokens[m].index < bytes)
135 			l = m + 1;
136 		else
137 			r = m - 1;
138 	}
139 	size_t start = r;
140 
141 	// search remaining with linear search
142 	foreach (i, tok; tokens[start .. $])
143 	{
144 		if (tok.index == bytes)
145 			return start + i;
146 		else if (tok.index > bytes)
147 			return start + i - 1;
148 	}
149 	return tokens.length;
150 }
151 
152 /// ditto
153 size_t tokenIndexAtPosition(scope const(Token)[] tokens, uint line, uint column)
154 out (v; v <= tokens.length)
155 {
156 	int cmp(Token token)
157 	{
158 		if (token.line != line)
159 			return token.line < line ? -1 : 1;
160 		else if (token.column != column)
161 			return token.column < column ? -1 : 1;
162 		else
163 			return 0;
164 	}
165 
166 	if (!tokens.length || cmp(tokens[0]) >= 0)
167 		return 0;
168 
169 	// find where to start using binary search
170 	size_t l = 0;
171 	size_t r = tokens.length - 1;
172 	while (l < r)
173 	{
174 		size_t m = (l + r) / 2;
175 		if (cmp(tokens[m]) < 0)
176 			l = m + 1;
177 		else
178 			r = m - 1;
179 	}
180 	size_t start = r;
181 
182 	// search remaining with linear search
183 	foreach (i, tok; tokens[start .. $])
184 	{
185 		if (cmp(tok) == 0)
186 			return start + i;
187 		else if (cmp(tok) > 0)
188 			return start + i - 1;
189 	}
190 	return tokens.length;
191 }
192 
193 ///
194 unittest
195 {
196 	StringCache stringCache = StringCache(StringCache.defaultBucketCount);
197 	const(Token)[] tokens = getTokensForParser(cast(ubyte[]) `module foo.bar;
198 
199 // ok
200 void main(string[] args)
201 {
202 }
203 
204 /// documentation
205 void foo()
206 {
207 }
208 `, LexerConfig.init, &stringCache);
209 
210 	auto get(size_t bytes)
211 	{
212 		auto i = tokens.tokenIndexAtByteIndex(bytes);
213 		if (i == tokens.length)
214 			return tok!"__EOF__";
215 		return tokens[i].type;
216 	}
217 
218 	assert(get(0) == tok!"module");
219 	assert(get(4) == tok!"module");
220 	assert(get(6) == tok!"module");
221 	assert(get(7) == tok!"identifier");
222 	assert(get(9) == tok!"identifier");
223 	assert(get(10) == tok!".");
224 	assert(get(11) == tok!"identifier");
225 	assert(get(16) == tok!";");
226 	assert(get(49) == tok!"{");
227 	assert(get(48) == tok!"{");
228 	assert(get(47) == tok!")");
229 	assert(get(1000) == tok!"__EOF__");
230 
231 	// TODO: process trivia fields in libdparse >=0.15.0 when it releases
232 	//assert(get(20) == tok!"comment");
233 	assert(get(20) == tok!";");
234 
235 	// assert(get(57) == tok!"comment");
236 }
237 
238 bool isSomeString(const IdType type)
239 {
240 	switch (type)
241 	{
242 	case tok!"stringLiteral":
243 	case tok!"wstringLiteral":
244 	case tok!"dstringLiteral":
245 		return true;
246 	default:
247 		return false;
248 	}
249 }
250 
251 /// Tries to evaluate an expression if it evaluates to a string.
252 /// Returns: `null` if the resulting value is not a string or could not be
253 /// evaluated.
254 string evaluateExpressionString(const PrimaryExpression expr)
255 in (expr !is null)
256 {
257 	return evaluateExpressionString(expr.primary);
258 }
259 
260 /// ditto
261 string evaluateExpressionString(const UnaryExpression expr)
262 in (expr !is null)
263 {
264 	if (expr.primaryExpression)
265 		return evaluateExpressionString(expr.primaryExpression);
266 	else
267 		return null;
268 }
269 
270 /// ditto
271 string evaluateExpressionString(const ExpressionNode expr)
272 in (expr !is null)
273 {
274 	// maybe we want to support simple concatenation here some time
275 
276 	if (auto unary = cast(UnaryExpression) expr)
277 		return evaluateExpressionString(unary);
278 	else
279 		return null;
280 }
281 
282 /// ditto
283 string evaluateExpressionString(const Token token)
284 {
285 	import dparse.strings : unescapeString, isStringLiteral;
286 
287 	switch (token.type)
288 	{
289 	case tok!"stringLiteral":
290 	case tok!"wstringLiteral":
291 	case tok!"dstringLiteral":
292 		auto str = token.text;
293 
294 		// we want to unquote here
295 		// foreach because implicit concatenation can combine multiple strings
296 		auto ret = appender!string;
297 		scope StringCache cache = StringCache(16);
298 		LexerConfig config;
299 		config.commentBehavior = CommentBehavior.noIntern;
300 		config.stringBehavior = StringBehavior.source;
301 		config.whitespaceBehavior = WhitespaceBehavior.skip;
302 		config.fileName = "evaluate-string-stdin";
303 		foreach (t; DLexer(str, config, &cache))
304 		{
305 			switch (t.type)
306 			{
307 			case tok!"stringLiteral":
308 			case tok!"wstringLiteral":
309 			case tok!"dstringLiteral":
310 				if (t.text.isStringLiteral)
311 				{
312 					ret ~= unescapeString(t.text);
313 				}
314 				else
315 				{
316 					debug
317 					{
318 						throw new Exception("Invalid stringLiteral in stringLiteral token: `" ~ t.text ~ '`');
319 					}
320 					else
321 					{
322 						warningf("Invalid stringLiteral in stringLiteral token: `%s`", t.text);
323 						return str;
324 					}
325 				}
326 				break;
327 			default:
328 				// unexpected token, return input because it might already be
329 				// unescaped
330 				return str;
331 			}
332 		}
333 
334 		return ret.data;
335 	default:
336 		return null;
337 	}
338 }
339 
340 /// Finds the deepest non-null node of any BaseNode. (like visiting the tree)
341 /// Aborts on types that contain `DeclarationOrStatement` or `Declaration[]`
342 /// fields.
343 /// Useful for getting the IfStatement out of a DeclarationOrStatement without
344 /// traversing its children.
345 BaseNode findDeepestNonBlockNode(T : BaseNode)(T ast)
346 {
347 	static assert(!is(T == BaseNode), "Passed in a BaseNode, that's probably not what you wanted to do (pass in the most specific type you have)");
348 	bool nonProcess = false;
349 	foreach (member; ast.tupleof)
350 	{
351 		static if (is(typeof(member) : DeclarationOrStatement)
352 			|| is(typeof(member) : Declaration[]))
353 		{
354 			nonProcess = true;
355 		}
356 	}
357 
358 	if (nonProcess)
359 		return ast;
360 
361 	foreach (member; ast.tupleof)
362 	{
363 		static if (is(typeof(member) : BaseNode))
364 		{
365 			if (member !is null)
366 			{
367 				return findDeepestNonBlockNode(member);
368 			}
369 		}
370 	}
371 	return ast;
372 }
373 
374 /// Gets the final `else` block of an if. Will return a node of type
375 /// `IfStatement` if it's an `else if` block. Returns null if there is no single
376 /// else statement.
377 BaseNode getIfElse(IfStatement ifStmt)
378 {
379 	if (!ifStmt.elseStatement)
380 		return null;
381 
382 	while (true)
383 	{
384 		auto elseStmt = ifStmt.elseStatement;
385 		if (!elseStmt)
386 			return ifStmt;
387 
388 		auto stmtInElse = elseStmt.findDeepestNonBlockNode;
389 		assert(stmtInElse !is elseStmt);
390 
391 		if (cast(IfStatement)stmtInElse)
392 			ifStmt = cast(IfStatement)stmtInElse;
393 		else
394 			return stmtInElse;
395 	}
396 }
397 
398 unittest
399 {
400 	StringCache stringCache = StringCache(StringCache.defaultBucketCount);
401 	RollbackAllocator rba;
402 	IfStatement parseIfStmt(string code)
403 	{
404 		const(Token)[] tokens = getTokensForParser(cast(ubyte[])code, LexerConfig.init, &stringCache);
405 		auto parser = new Parser();
406 		parser.tokens = tokens;
407 		parser.allocator = &rba;
408 		return parser.parseIfStatement();
409 	}
410 
411 	alias p = parseIfStmt;
412 	assert(getIfElse(p("if (x) {}")) is null);
413 	assert(getIfElse(p("if (x) {} else if (y) {}")) !is null);
414 	assert(cast(IfStatement)getIfElse(p("if (x) {} else if (y) {}")) !is null, typeid(getIfElse(p("if (x) {} else if (y) {}"))).name);
415 	assert(getIfElse(p("if (x) {} else if (y) {} else {}")) !is null);
416 	assert(cast(IfStatement)getIfElse(p("if (x) {} else if (y) {} else {}")) is null);
417 }
418 
419 C[] substr(C)(C[] s, size_t[2] range)
420 {
421 	return substr(s, range[0], range[1]);
422 }
423 
424 C[] substr(C)(C[] s, size_t start, size_t end)
425 {
426 	if (!s.length)
427 		return s;
428 	if (start < 0)
429 		start = 0;
430 	if (start >= s.length)
431 		start = s.length - 1; // @suppress(dscanner.suspicious.length_subtraction)
432 	if (end > s.length)
433 		end = s.length;
434 	if (end < start)
435 		return s[start .. start];
436 	return s[start .. end];
437 }