1 module workspaced.dparseext;
2 
3 import std.algorithm;
4 import std.array;
5 import std.string;
6 
7 import dparse.ast;
8 import dparse.lexer;
9 import dparse.parser;
10 import dparse.rollback_allocator;
11 import dsymbol.builtin.names;
12 import dsymbol.modulecache : ASTAllocator, ModuleCache;
13 
14 string makeString(in IdentifierOrTemplateChain c)
15 {
16 	return c.identifiersOrTemplateInstances.map!(a => a.identifier.text).join(".");
17 }
18 
19 string astToString(T, Args...)(in T ast, Args args)
20 {
21 	import dparse.formatter : Formatter;
22 
23 	if (!ast)
24 		return null;
25 
26 	auto app = appender!string();
27 	auto formatter = new Formatter!(typeof(app))(app);
28 	formatter.format(ast, args);
29 	return app.data;
30 }
31 
32 string paramsToString(Dec)(const Dec dec)
33 {
34 	import dparse.formatter : Formatter;
35 
36 	auto app = appender!string();
37 	auto formatter = new Formatter!(typeof(app))(app);
38 
39 	static if (is(Dec == FunctionDeclaration) || is(Dec == Constructor))
40 	{
41 		formatter.format(dec.parameters);
42 	}
43 	else static if (is(Dec == TemplateDeclaration))
44 	{
45 		formatter.format(dec.templateParameters);
46 	}
47 
48 	return app.data;
49 }
50 
51 /// Other tokens
52 private enum dynamicTokens = [
53 		"specialTokenSequence", "comment", "identifier", "scriptLine",
54 		"whitespace", "doubleLiteral", "floatLiteral", "idoubleLiteral",
55 		"ifloatLiteral", "intLiteral", "longLiteral", "realLiteral",
56 		"irealLiteral", "uintLiteral", "ulongLiteral", "characterLiteral",
57 		"dstringLiteral", "stringLiteral", "wstringLiteral"
58 	];
59 
60 string tokenText(const Token token)
61 {
62 	switch (token.type)
63 	{
64 		static foreach (T; dynamicTokens)
65 		{
66 	case tok!T:
67 		}
68 		return token.text;
69 	default:
70 		return str(token.type);
71 	}
72 }
73 
74 size_t textLength(const Token token)
75 {
76 	return token.tokenText.length;
77 }
78 
79 bool isSomeString(const Token token)
80 {
81 	switch (token.type)
82 	{
83 	case tok!"characterLiteral":
84 	case tok!"dstringLiteral":
85 	case tok!"stringLiteral":
86 	case tok!"wstringLiteral":
87 		return true;
88 	default:
89 		return false;
90 	}
91 }
92 
93 bool isLikeIdentifier(const Token token)
94 {
95 	import workspaced.helpers;
96 
97 	auto text = token.tokenText;
98 	return text.length && text[0].isIdentifierChar;
99 }
100 
101 /// Performs a binary search to find the token containing the search location.
102 /// Params:
103 ///   tokens = the token array to search in.
104 ///   bytes  = the byte index the token should be in.
105 /// Returns: the index of the token inside the given tokens array which
106 /// contains the character specified at the given byte. This will be the first
107 /// token that is `tok.index == bytes` or before the next token that is too far.
108 /// If no tokens match, this will return `tokens.length`.
109 ///
110 /// This is equivalent to the following code:
111 /// ---
112 /// foreach (i, tok; tokens)
113 /// {
114 /// 	if (tok.index == bytes)
115 /// 		return i;
116 /// 	else if (tok.index > bytes)
117 /// 		return i - 1;
118 /// }
119 /// return tokens.length;
120 /// ---
121 size_t tokenIndexAtByteIndex(scope const(Token)[] tokens, size_t bytes)
122 out (v; v <= tokens.length)
123 {
124 	if (!tokens.length || tokens[0].index >= bytes)
125 		return 0;
126 
127 	// find where to start using binary search
128 	size_t l = 0;
129 	size_t r = tokens.length - 1;
130 	while (l < r)
131 	{
132 		size_t m = (l + r) / 2;
133 		if (tokens[m].index < bytes)
134 			l = m + 1;
135 		else
136 			r = m - 1;
137 	}
138 	size_t start = r;
139 
140 	// search remaining with linear search
141 	foreach (i, tok; tokens[start .. $])
142 	{
143 		if (tok.index == bytes)
144 			return start + i;
145 		else if (tok.index > bytes)
146 			return start + i - 1;
147 	}
148 	return tokens.length;
149 }
150 
151 /// ditto
152 size_t tokenIndexAtPosition(scope const(Token)[] tokens, uint line, uint column)
153 out (v; v <= tokens.length)
154 {
155 	int cmp(Token token)
156 	{
157 		if (token.line != line)
158 			return token.line < line ? -1 : 1;
159 		else if (token.column != column)
160 			return token.column < column ? -1 : 1;
161 		else
162 			return 0;
163 	}
164 
165 	if (!tokens.length || cmp(tokens[0]) >= 0)
166 		return 0;
167 
168 	// find where to start using binary search
169 	size_t l = 0;
170 	size_t r = tokens.length - 1;
171 	while (l < r)
172 	{
173 		size_t m = (l + r) / 2;
174 		if (cmp(tokens[m]) < 0)
175 			l = m + 1;
176 		else
177 			r = m - 1;
178 	}
179 	size_t start = r;
180 
181 	// search remaining with linear search
182 	foreach (i, tok; tokens[start .. $])
183 	{
184 		if (cmp(tok) == 0)
185 			return start + i;
186 		else if (cmp(tok) > 0)
187 			return start + i - 1;
188 	}
189 	return tokens.length;
190 }
191 
192 ///
193 unittest
194 {
195 	StringCache stringCache = StringCache(StringCache.defaultBucketCount);
196 	const(Token)[] tokens = getTokensForParser(cast(ubyte[]) `module foo.bar;
197 
198 // ok
199 void main(string[] args)
200 {
201 }
202 
203 /// documentation
204 void foo()
205 {
206 }
207 `, LexerConfig.init, &stringCache);
208 
209 	auto get(size_t bytes)
210 	{
211 		auto i = tokens.tokenIndexAtByteIndex(bytes);
212 		if (i == tokens.length)
213 			return tok!"__EOF__";
214 		return tokens[i].type;
215 	}
216 
217 	assert(get(0) == tok!"module");
218 	assert(get(4) == tok!"module");
219 	assert(get(6) == tok!"module");
220 	assert(get(7) == tok!"identifier");
221 	assert(get(9) == tok!"identifier");
222 	assert(get(10) == tok!".");
223 	assert(get(11) == tok!"identifier");
224 	assert(get(16) == tok!";");
225 	assert(get(49) == tok!"{");
226 	assert(get(48) == tok!"{");
227 	assert(get(47) == tok!")");
228 	assert(get(1000) == tok!"__EOF__");
229 
230 	// TODO: process trivia fields in libdparse >=0.15.0 when it releases
231 	//assert(get(20) == tok!"comment");
232 	assert(get(20) == tok!";");
233 
234 	// assert(get(57) == tok!"comment");
235 }
236 
237 bool isSomeString(const IdType type)
238 {
239 	switch (type)
240 	{
241 	case tok!"stringLiteral":
242 	case tok!"wstringLiteral":
243 	case tok!"dstringLiteral":
244 		return true;
245 	default:
246 		return false;
247 	}
248 }
249 
250 /// Tries to evaluate an expression if it evaluates to a string.
251 /// Returns: `null` if the resulting value is not a string or could not be
252 /// evaluated.
253 string evaluateExpressionString(const PrimaryExpression expr)
254 in (expr !is null)
255 {
256 	return evaluateExpressionString(expr.primary);
257 }
258 
259 /// ditto
260 string evaluateExpressionString(const UnaryExpression expr)
261 in (expr !is null)
262 {
263 	if (expr.primaryExpression)
264 		return evaluateExpressionString(expr.primaryExpression);
265 	else
266 		return null;
267 }
268 
269 /// ditto
270 string evaluateExpressionString(const ExpressionNode expr)
271 in (expr !is null)
272 {
273 	// maybe we want to support simple concatenation here some time
274 
275 	if (auto unary = cast(UnaryExpression) expr)
276 		return evaluateExpressionString(unary);
277 	else
278 		return null;
279 }
280 
281 /// ditto
282 string evaluateExpressionString(const Token token)
283 {
284 	import dparse.strings : unescapeString;
285 
286 	switch (token.type)
287 	{
288 	case tok!"stringLiteral":
289 	case tok!"wstringLiteral":
290 	case tok!"dstringLiteral":
291 		auto str = token.text;
292 
293 		// we want to unquote here
294 		// foreach because implicit concatenation can combine multiple strings
295 		auto ret = appender!string;
296 		scope StringCache cache = StringCache(16);
297 		LexerConfig config;
298 		config.commentBehavior = CommentBehavior.noIntern;
299 		config.stringBehavior = StringBehavior.source;
300 		config.whitespaceBehavior = WhitespaceBehavior.skip;
301 		config.fileName = "evaluate-string-stdin";
302 		foreach (t; DLexer(str, config, &cache))
303 		{
304 			switch (t.type)
305 			{
306 			case tok!"stringLiteral":
307 			case tok!"wstringLiteral":
308 			case tok!"dstringLiteral":
309 				ret ~= unescapeString(t.text);
310 				break;
311 			default:
312 				// unexpected token, return input because it might already be
313 				// unescaped
314 				return str;
315 			}
316 		}
317 
318 		return ret.data;
319 	default:
320 		return null;
321 	}
322 }
323 
324 /// Finds the deepest non-null node of any BaseNode. (like visiting the tree)
325 /// Aborts on types that contain `DeclarationOrStatement` or `Declaration[]`
326 /// fields.
327 /// Useful for getting the IfStatement out of a DeclarationOrStatement without
328 /// traversing its children.
329 BaseNode findDeepestNonBlockNode(T : BaseNode)(T ast)
330 {
331 	static assert(!is(T == BaseNode), "Passed in a BaseNode, that's probably not what you wanted to do (pass in the most specific type you have)");
332 	bool nonProcess = false;
333 	foreach (member; ast.tupleof)
334 	{
335 		static if (is(typeof(member) : DeclarationOrStatement)
336 			|| is(typeof(member) : Declaration[]))
337 		{
338 			nonProcess = true;
339 		}
340 	}
341 
342 	if (nonProcess)
343 		return ast;
344 
345 	foreach (member; ast.tupleof)
346 	{
347 		static if (is(typeof(member) : BaseNode))
348 		{
349 			if (member !is null)
350 			{
351 				return findDeepestNonBlockNode(member);
352 			}
353 		}
354 	}
355 	return ast;
356 }
357 
358 /// Gets the final `else` block of an if. Will return a node of type
359 /// `IfStatement` if it's an `else if` block. Returns null if there is no single
360 /// else statement.
361 BaseNode getIfElse(IfStatement ifStmt)
362 {
363 	if (!ifStmt.elseStatement)
364 		return null;
365 
366 	while (true)
367 	{
368 		auto elseStmt = ifStmt.elseStatement;
369 		if (!elseStmt)
370 			return ifStmt;
371 
372 		auto stmtInElse = elseStmt.findDeepestNonBlockNode;
373 		assert(stmtInElse !is elseStmt);
374 
375 		if (cast(IfStatement)stmtInElse)
376 			ifStmt = cast(IfStatement)stmtInElse;
377 		else
378 			return stmtInElse;
379 	}
380 }
381 
382 unittest
383 {
384 	StringCache stringCache = StringCache(StringCache.defaultBucketCount);
385 	RollbackAllocator rba;
386 	IfStatement parseIfStmt(string code)
387 	{
388 		const(Token)[] tokens = getTokensForParser(cast(ubyte[])code, LexerConfig.init, &stringCache);
389 		auto parser = new Parser();
390 		parser.tokens = tokens;
391 		parser.allocator = &rba;
392 		return parser.parseIfStatement();
393 	}
394 
395 	alias p = parseIfStmt;
396 	assert(getIfElse(p("if (x) {}")) is null);
397 	assert(getIfElse(p("if (x) {} else if (y) {}")) !is null);
398 	assert(cast(IfStatement)getIfElse(p("if (x) {} else if (y) {}")) !is null, typeid(getIfElse(p("if (x) {} else if (y) {}"))).name);
399 	assert(getIfElse(p("if (x) {} else if (y) {} else {}")) !is null);
400 	assert(cast(IfStatement)getIfElse(p("if (x) {} else if (y) {} else {}")) is null);
401 }