View Javadoc

1   /*
2    * TokenMarker.java - Generic token marker
3    * Copyright (C) 1998, 1999 Slava Pestov
4    *
5    * You may use and modify this package for any purpose. Redistribution is
6    * permitted, in both source and binary form, provided that this notice
7    * remains intact in all source distributions of this package.
8    */
9   
10  package org.syntax.jedit.tokenmarker;
11  
12  import javax.swing.text.Segment;
13  
14  /***
15   * A token marker that splits lines of text into tokens. Each token carries
16   * a length field and an indentification tag that can be mapped to a color
17   * for painting that token.<p>
18   *
19   * For performance reasons, the linked list of tokens is reused after each
20   * line is tokenized. Therefore, the return value of <code>markTokens</code>
21   * should only be used for immediate painting. Notably, it cannot be
22   * cached.
23   *
24   * @author Slava Pestov
25   * @version $Id$
26   *
27   * @see org.syntax.jedit.Token
28   */
29  public abstract class TokenMarker
30  {
31  	/***
32  	 * A wrapper for the lower-level <code>markTokensImpl</code> method
33  	 * that is called to split a line up into tokens.
34  	 * @param line The line
35  	 * @param lineIndex The line number
36  	 */
37  	public Token markTokens(Segment line, int lineIndex)
38  	{
39  		if(lineIndex >= length)
40  		{
41  			throw new IllegalArgumentException("Tokenizing invalid line: "
42  				+ lineIndex);
43  		}
44  
45  		lastToken = null;
46  
47  		LineInfo info = lineInfo[lineIndex];
48  		LineInfo prev;
49  		if(lineIndex == 0)
50  			prev = null;
51  		else
52  			prev = lineInfo[lineIndex - 1];
53  
54  		byte oldToken = info.token;
55  		byte token = markTokensImpl(prev == null ?
56  			Token.NULL : prev.token,line,lineIndex);
57  
58  		info.token = token;
59  
60  		/*
61  		 * This is a foul hack. It stops nextLineRequested
62  		 * from being cleared if the same line is marked twice.
63  		 *
64  		 * Why is this necessary? It's all JEditTextArea's fault.
65  		 * When something is inserted into the text, firing a
66  		 * document event, the insertUpdate() method shifts the
67  		 * caret (if necessary) by the amount inserted.
68  		 *
69  		 * All caret movement is handled by the select() method,
70  		 * which eventually pipes the new position to scrollTo()
71  		 * and calls repaint().
72  		 *
73  		 * Note that at this point in time, the new line hasn't
74  		 * yet been painted; the caret is moved first.
75  		 *
76  		 * scrollTo() calls offsetToX(), which tokenizes the line
77  		 * unless it is being called on the last line painted
78  		 * (in which case it uses the text area's painter cached
79  		 * token list). What scrollTo() does next is irrelevant.
80  		 *
81  		 * After scrollTo() has done it's job, repaint() is
82  		 * called, and eventually we end up in paintLine(), whose
83  		 * job is to paint the changed line. It, too, calls
84  		 * markTokens().
85  		 *
86  		 * The problem was that if the line started a multiline
87  		 * token, the first markTokens() (done in offsetToX())
88  		 * would set nextLineRequested (because the line end
89  		 * token had changed) but the second would clear it
90  		 * (because the line was the same that time) and therefore
91  		 * paintLine() would never know that it needed to repaint
92  		 * subsequent lines.
93  		 *
94  		 * This bug took me ages to track down, that's why I wrote
95  		 * all the relevant info down so that others wouldn't
96  		 * duplicate it.
97  		 */
98  		 if(!(lastLine == lineIndex && nextLineRequested))
99  			nextLineRequested = (oldToken != token);
100 
101 		lastLine = lineIndex;
102 
103 		addToken(0,Token.END);
104 
105 		return firstToken;
106 	}
107 
108 	/***
109 	 * An abstract method that splits a line up into tokens. It
110 	 * should parse the line, and call <code>addToken()</code> to
111 	 * add syntax tokens to the token list. Then, it should return
112 	 * the initial token type for the next line.<p>
113 	 *
114 	 * For example if the current line contains the start of a 
115 	 * multiline comment that doesn't end on that line, this method
116 	 * should return the comment token type so that it continues on
117 	 * the next line.
118 	 *
119 	 * @param token The initial token type for this line
120 	 * @param line The line to be tokenized
121 	 * @param lineIndex The index of the line in the document,
122 	 * starting at 0
123 	 * @return The initial token type for the next line
124 	 */
125 	protected abstract byte markTokensImpl(byte token, Segment line,
126 		int lineIndex);
127 
128 	/***
129 	 * Returns if the token marker supports tokens that span multiple
130 	 * lines. If this is true, the object using this token marker is
131 	 * required to pass all lines in the document to the
132 	 * <code>markTokens()</code> method (in turn).<p>
133 	 *
134 	 * The default implementation returns true; it should be overridden
135 	 * to return false on simpler token markers for increased speed.
136 	 */
137 	public boolean supportsMultilineTokens()
138 	{
139 		return true;
140 	}
141 
142 	/***
143 	 * Informs the token marker that lines have been inserted into
144 	 * the document. This inserts a gap in the <code>lineInfo</code>
145 	 * array.
146 	 * @param index The first line number
147 	 * @param lines The number of lines 
148 	 */
149 	public void insertLines(int index, int lines)
150 	{
151 		if(lines <= 0)
152 			return;
153 		length += lines;
154 		ensureCapacity(length);
155 		int len = index + lines;
156 		System.arraycopy(lineInfo,index,lineInfo,len,
157 			lineInfo.length - len);
158 
159 		for(int i = index + lines - 1; i >= index; i--)
160 		{
161 			lineInfo[i] = new LineInfo();
162 		}
163 	}
164 	
165 	/***
166 	 * Informs the token marker that line have been deleted from
167 	 * the document. This removes the lines in question from the
168 	 * <code>lineInfo</code> array.
169 	 * @param index The first line number
170 	 * @param lines The number of lines
171 	 */
172 	public void deleteLines(int index, int lines)
173 	{
174 		if (lines <= 0)
175 			return;
176 		int len = index + lines;
177 		length -= lines;
178 		System.arraycopy(lineInfo,len,lineInfo,
179 			index,lineInfo.length - len);
180 	}
181 
182 	/***
183 	 * Returns the number of lines in this token marker.
184 	 */
185 	public int getLineCount()
186 	{
187 		return length;
188 	}
189 
190 	/***
191 	 * Returns true if the next line should be repainted. This
192 	 * will return true after a line has been tokenized that starts
193 	 * a multiline token that continues onto the next line.
194 	 */
195 	public boolean isNextLineRequested()
196 	{
197 		return nextLineRequested;
198 	}
199 
200 	// protected members
201 
202 	/***
203 	 * The first token in the list. This should be used as the return
204 	 * value from <code>markTokens()</code>.
205 	 */
206 	protected Token firstToken;
207 
208 	/***
209 	 * The last token in the list. New tokens are added here.
210 	 * This should be set to null before a new line is to be tokenized.
211 	 */
212 	protected Token lastToken;
213 
214 	/***
215 	 * An array for storing information about lines. It is enlarged and
216 	 * shrunk automatically by the <code>insertLines()</code> and
217 	 * <code>deleteLines()</code> methods.
218 	 */
219 	protected LineInfo[] lineInfo;
220 
221 	/***
222 	 * The number of lines in the model being tokenized. This can be
223 	 * less than the length of the <code>lineInfo</code> array.
224 	 */
225 	protected int length;
226 
227 	/***
228 	 * The last tokenized line.
229 	 */
230 	protected int lastLine;
231 
232 	/***
233 	 * True if the next line should be painted.
234 	 */
235 	protected boolean nextLineRequested;
236 
237 	/***
238 	 * Creates a new <code>TokenMarker</code>. This DOES NOT create
239 	 * a lineInfo array; an initial call to <code>insertLines()</code>
240 	 * does that.
241 	 */
242 	protected TokenMarker()
243 	{
244 		lastLine = -1;
245 	}
246 
247 	/***
248 	 * Ensures that the <code>lineInfo</code> array can contain the
249 	 * specified index. This enlarges it if necessary. No action is
250 	 * taken if the array is large enough already.<p>
251 	 *
252 	 * It should be unnecessary to call this under normal
253 	 * circumstances; <code>insertLine()</code> should take care of
254 	 * enlarging the line info array automatically.
255 	 *
256 	 * @param index The array index
257 	 */
258 	protected void ensureCapacity(int index)
259 	{
260 		if(lineInfo == null)
261 			lineInfo = new LineInfo[index + 1];
262 		else if(lineInfo.length <= index)
263 		{
264 			LineInfo[] lineInfoN = new LineInfo[(index + 1) * 2];
265 			System.arraycopy(lineInfo,0,lineInfoN,0,
266 					 lineInfo.length);
267 			lineInfo = lineInfoN;
268 		}
269 	}
270 
271 	/***
272 	 * Adds a token to the token list.
273 	 * @param length The length of the token
274 	 * @param id The id of the token
275 	 */
276 	protected void addToken(int length, byte id)
277 	{
278 		if(id >= Token.INTERNAL_FIRST && id <= Token.INTERNAL_LAST)
279 			throw new InternalError("Invalid id: " + id);
280 
281 		if(length == 0 && id != Token.END)
282 			return;
283 
284 		if(firstToken == null)
285 		{
286 			firstToken = new Token(length,id);
287 			lastToken = firstToken;
288 		}
289 		else if(lastToken == null)
290 		{
291 			lastToken = firstToken;
292 			firstToken.length = length;
293 			firstToken.id = id;
294 		}
295 		else if(lastToken.next == null)
296 		{
297 			lastToken.next = new Token(length,id);
298 			lastToken = lastToken.next;
299 		}
300 		else
301 		{
302 			lastToken = lastToken.next;
303 			lastToken.length = length;
304 			lastToken.id = id;
305 		}
306 	}
307 
308 	/***
309 	 * Inner class for storing information about tokenized lines.
310 	 */
311 	public class LineInfo
312 	{
313 		/***
314 		 * Creates a new LineInfo object with token = Token.NULL
315 		 * and obj = null.
316 		 */
317 		public LineInfo()
318 		{
319 		}
320 
321 		/***
322 		 * Creates a new LineInfo object with the specified
323 		 * parameters.
324 		 */
325 		public LineInfo(byte token, Object obj)
326 		{
327 			this.token = token;
328 			this.obj = obj;
329 		}
330 
331 		/***
332 		 * The id of the last token of the line.
333 		 */
334 		public byte token;
335 
336 		/***
337 		 * This is for use by the token marker implementations
338 		 * themselves. It can be used to store anything that
339 		 * is an object and that needs to exist on a per-line
340 		 * basis.
341 		 */
342 		public Object obj;
343 	}
344 }