View Javadoc

1   /*
2    * TokenMarker.java - Generic token marker
3    * Copyright (C) 1998, 1999 Slava Pestov
4    *
5    * You may use and modify this package for any purpose. Redistribution is
6    * permitted, in both source and binary form, provided that this notice
7    * remains intact in all source distributions of this package.
8    */
9   
10  package org.syntax.jedit.tokenmarker;
11  
12  import javax.swing.text.Segment;
13  
14  /***
15   * A token marker that splits lines of text into tokens. Each token carries a
16   * length field and an indentification tag that can be mapped to a color for
17   * painting that token.
18   * <p>
19   * 
20   * For performance reasons, the linked list of tokens is reused after each line
21   * is tokenized. Therefore, the return value of <code>markTokens</code> should
22   * only be used for immediate painting. Notably, it cannot be cached.
23   * 
24   * @author Slava Pestov
25   * @version $Id$
26   * 
27   * @see org.syntax.jedit.Token
28   */
29  public abstract class TokenMarker
30  {
31  	/***
32  	 * A wrapper for the lower-level <code>markTokensImpl</code> method that is
33  	 * called to split a line up into tokens.
34  	 * 
35  	 * @param line
36  	 *           The line
37  	 * @param lineIndex
38  	 *           The line number
39  	 */
40  	public Token markTokens( Segment line, int lineIndex )
41  	{
42  		if( lineIndex >= length )
43  		{
44  			throw new IllegalArgumentException( "Tokenizing invalid line: " + lineIndex );
45  		}
46  
47  		lastToken = null;
48  
49  		LineInfo info = lineInfo[lineIndex];
50  		LineInfo prev;
51  		if( lineIndex == 0 )
52  			prev = null;
53  		else
54  			prev = lineInfo[lineIndex - 1];
55  
56  		byte oldToken = info.token;
57  		byte token = markTokensImpl( prev == null ? Token.NULL : prev.token, line, lineIndex );
58  
59  		info.token = token;
60  
61  		/*
62  		 * This is a foul hack. It stops nextLineRequested from being cleared if
63  		 * the same line is marked twice.
64  		 * 
65  		 * Why is this necessary? It's all JEditTextArea's fault. When something
66  		 * is inserted into the text, firing a document event, the insertUpdate()
67  		 * method shifts the caret (if necessary) by the amount inserted.
68  		 * 
69  		 * All caret movement is handled by the select() method, which eventually
70  		 * pipes the new position to scrollTo() and calls repaint().
71  		 * 
72  		 * Note that at this point in time, the new line hasn't yet been painted;
73  		 * the caret is moved first.
74  		 * 
75  		 * scrollTo() calls offsetToX(), which tokenizes the line unless it is
76  		 * being called on the last line painted (in which case it uses the text
77  		 * area's painter cached token list). What scrollTo() does next is
78  		 * irrelevant.
79  		 * 
80  		 * After scrollTo() has done it's job, repaint() is called, and eventually
81  		 * we end up in paintLine(), whose job is to paint the changed line. It,
82  		 * too, calls markTokens().
83  		 * 
84  		 * The problem was that if the line started a multiline token, the first
85  		 * markTokens() (done in offsetToX()) would set nextLineRequested (because
86  		 * the line end token had changed) but the second would clear it (because
87  		 * the line was the same that time) and therefore paintLine() would never
88  		 * know that it needed to repaint subsequent lines.
89  		 * 
90  		 * This bug took me ages to track down, that's why I wrote all the
91  		 * relevant info down so that others wouldn't duplicate it.
92  		 */
93  		if( !( lastLine == lineIndex && nextLineRequested ) )
94  			nextLineRequested = ( oldToken != token );
95  
96  		lastLine = lineIndex;
97  
98  		addToken( 0, Token.END );
99  
100 		return firstToken;
101 	}
102 
103 	/***
104 	 * An abstract method that splits a line up into tokens. It should parse the
105 	 * line, and call <code>addToken()</code> to add syntax tokens to the token
106 	 * list. Then, it should return the initial token type for the next line.
107 	 * <p>
108 	 * 
109 	 * For example if the current line contains the start of a multiline comment
110 	 * that doesn't end on that line, this method should return the comment token
111 	 * type so that it continues on the next line.
112 	 * 
113 	 * @param token
114 	 *           The initial token type for this line
115 	 * @param line
116 	 *           The line to be tokenized
117 	 * @param lineIndex
118 	 *           The index of the line in the document, starting at 0
119 	 * @return The initial token type for the next line
120 	 */
121 	protected abstract byte markTokensImpl( byte token, Segment line, int lineIndex );
122 
123 	/***
124 	 * Returns if the token marker supports tokens that span multiple lines. If
125 	 * this is true, the object using this token marker is required to pass all
126 	 * lines in the document to the <code>markTokens()</code> method (in turn).
127 	 * <p>
128 	 * 
129 	 * The default implementation returns true; it should be overridden to return
130 	 * false on simpler token markers for increased speed.
131 	 */
132 	public boolean supportsMultilineTokens()
133 	{
134 		return true;
135 	}
136 
137 	/***
138 	 * Informs the token marker that lines have been inserted into the document.
139 	 * This inserts a gap in the <code>lineInfo</code> array.
140 	 * 
141 	 * @param index
142 	 *           The first line number
143 	 * @param lines
144 	 *           The number of lines
145 	 */
146 	public void insertLines( int index, int lines )
147 	{
148 		if( lines <= 0 )
149 			return;
150 		length += lines;
151 		ensureCapacity( length );
152 		int len = index + lines;
153 		System.arraycopy( lineInfo, index, lineInfo, len, lineInfo.length - len );
154 
155 		for( int i = index + lines - 1; i >= index; i-- )
156 		{
157 			lineInfo[i] = new LineInfo();
158 		}
159 	}
160 
161 	/***
162 	 * Informs the token marker that line have been deleted from the document.
163 	 * This removes the lines in question from the <code>lineInfo</code> array.
164 	 * 
165 	 * @param index
166 	 *           The first line number
167 	 * @param lines
168 	 *           The number of lines
169 	 */
170 	public void deleteLines( int index, int lines )
171 	{
172 		if( lines <= 0 )
173 			return;
174 		int len = index + lines;
175 		length -= lines;
176 		System.arraycopy( lineInfo, len, lineInfo, index, lineInfo.length - len );
177 	}
178 
179 	/***
180 	 * Returns the number of lines in this token marker.
181 	 */
182 	public int getLineCount()
183 	{
184 		return length;
185 	}
186 
187 	/***
188 	 * Returns true if the next line should be repainted. This will return true
189 	 * after a line has been tokenized that starts a multiline token that
190 	 * continues onto the next line.
191 	 */
192 	public boolean isNextLineRequested()
193 	{
194 		return nextLineRequested;
195 	}
196 
197 	// protected members
198 
199 	/***
200 	 * The first token in the list. This should be used as the return value from
201 	 * <code>markTokens()</code>.
202 	 */
203 	protected Token firstToken;
204 
205 	/***
206 	 * The last token in the list. New tokens are added here. This should be set
207 	 * to null before a new line is to be tokenized.
208 	 */
209 	protected Token lastToken;
210 
211 	/***
212 	 * An array for storing information about lines. It is enlarged and shrunk
213 	 * automatically by the <code>insertLines()</code> and
214 	 * <code>deleteLines()</code> methods.
215 	 */
216 	protected LineInfo[] lineInfo;
217 
218 	/***
219 	 * The number of lines in the model being tokenized. This can be less than
220 	 * the length of the <code>lineInfo</code> array.
221 	 */
222 	protected int length;
223 
224 	/***
225 	 * The last tokenized line.
226 	 */
227 	protected int lastLine;
228 
229 	/***
230 	 * True if the next line should be painted.
231 	 */
232 	protected boolean nextLineRequested;
233 
234 	/***
235 	 * Creates a new <code>TokenMarker</code>. This DOES NOT create a lineInfo
236 	 * array; an initial call to <code>insertLines()</code> does that.
237 	 */
238 	protected TokenMarker()
239 	{
240 		lastLine = -1;
241 	}
242 
243 	/***
244 	 * Ensures that the <code>lineInfo</code> array can contain the specified
245 	 * index. This enlarges it if necessary. No action is taken if the array is
246 	 * large enough already.
247 	 * <p>
248 	 * 
249 	 * It should be unnecessary to call this under normal circumstances;
250 	 * <code>insertLine()</code> should take care of enlarging the line info
251 	 * array automatically.
252 	 * 
253 	 * @param index
254 	 *           The array index
255 	 */
256 	protected void ensureCapacity( int index )
257 	{
258 		if( lineInfo == null )
259 			lineInfo = new LineInfo[index + 1];
260 		else if( lineInfo.length <= index )
261 		{
262 			LineInfo[] lineInfoN = new LineInfo[( index + 1 ) * 2];
263 			System.arraycopy( lineInfo, 0, lineInfoN, 0, lineInfo.length );
264 			lineInfo = lineInfoN;
265 		}
266 	}
267 
268 	/***
269 	 * Adds a token to the token list.
270 	 * 
271 	 * @param length
272 	 *           The length of the token
273 	 * @param id
274 	 *           The id of the token
275 	 */
276 	protected void addToken( int length, byte id )
277 	{
278 		if( id >= Token.INTERNAL_FIRST && id <= Token.INTERNAL_LAST )
279 			throw new InternalError( "Invalid id: " + id );
280 
281 		if( length == 0 && id != Token.END )
282 			return;
283 
284 		if( firstToken == null )
285 		{
286 			firstToken = new Token( length, id );
287 			lastToken = firstToken;
288 		}
289 		else if( lastToken == null )
290 		{
291 			lastToken = firstToken;
292 			firstToken.length = length;
293 			firstToken.id = id;
294 		}
295 		else if( lastToken.next == null )
296 		{
297 			lastToken.next = new Token( length, id );
298 			lastToken = lastToken.next;
299 		}
300 		else
301 		{
302 			lastToken = lastToken.next;
303 			lastToken.length = length;
304 			lastToken.id = id;
305 		}
306 	}
307 
308 	/***
309 	 * Inner class for storing information about tokenized lines.
310 	 */
311 	public class LineInfo
312 	{
313 		/***
314 		 * Creates a new LineInfo object with token = Token.NULL and obj = null.
315 		 */
316 		public LineInfo()
317 		{
318 		}
319 
320 		/***
321 		 * Creates a new LineInfo object with the specified parameters.
322 		 */
323 		public LineInfo( byte token, Object obj )
324 		{
325 			this.token = token;
326 			this.obj = obj;
327 		}
328 
329 		/***
330 		 * The id of the last token of the line.
331 		 */
332 		public byte token;
333 
334 		/***
335 		 * This is for use by the token marker implementations themselves. It can
336 		 * be used to store anything that is an object and that needs to exist on
337 		 * a per-line basis.
338 		 */
339 		public Object obj;
340 	}
341 }