1
2
3
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.BufferedReader;
7 import java.io.CharArrayReader;
8 import java.io.Closeable;
9 import java.io.IOException;
10 import java.io.PushbackReader;
11 import java.util.Properties;
12
13 import org.apache.commons.io.IOUtils;
14 import org.apache.commons.lang3.RandomStringUtils;
15
16
17
18
19
20
21 public class CsTokenizer implements Tokenizer {
22
23 private boolean ignoreUsings = false;
24
25 public void setProperties(Properties properties) {
26 if (properties.containsKey(IGNORE_USINGS)) {
27 ignoreUsings = Boolean.parseBoolean(properties.getProperty(IGNORE_USINGS, "false"));
28 }
29 }
30
31 @Override
32 public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
33 Tokenizer tokenizer =
34 new Tokenizer(sourceCode.getCodeBuffer().toString());
35 Token token = tokenizer.getNextToken();
36
37 while (!token.equals(Token.EOF)) {
38 Token lookAhead = tokenizer.getNextToken();
39
40
41
42
43
44
45 if (ignoreUsings &&
46 "using".equals(token.image) &&
47 !"(".equals(lookAhead.image)
48 ) {
49
50
51
52 String randomTokenText =
53 RandomStringUtils.randomAlphanumeric(20);
54
55 token = new Token(randomTokenText, token.lineNumber);
56
57 while (!";".equals(lookAhead.image) && !lookAhead.equals(Token.EOF)) {
58 lookAhead = tokenizer.getNextToken();
59 }
60 }
61 if (!";".equals(token.image)) {
62 tokenEntries.add(new TokenEntry(token.image, sourceCode.getFileName(), token.lineNumber));
63 }
64 token = lookAhead;
65 }
66 tokenEntries.add(TokenEntry.getEOF());
67 IOUtils.closeQuietly(tokenizer);
68 }
69
70 public void setIgnoreUsings(boolean ignoreUsings) {
71 this.ignoreUsings = ignoreUsings;
72 }
73
74
75 private static class Tokenizer implements Closeable {
76 private boolean endOfFile;
77 private int line;
78 private final PushbackReader reader;
79
80 public Tokenizer(String sourceCode) {
81 endOfFile = false;
82 line = 1;
83 reader = new PushbackReader(new BufferedReader(new CharArrayReader(sourceCode.toCharArray())));
84 }
85
86 public Token getNextToken() {
87 if (endOfFile) {
88 return Token.EOF;
89 }
90
91 try {
92 int ic = reader.read();
93 char c;
94 StringBuilder b;
95 while (ic != -1) {
96 c = (char) ic;
97 switch (c) {
98
99 case '\n':
100 line++;
101 ic = reader.read();
102 break;
103
104
105 case ' ':
106 case '\t':
107 case '\r':
108 ic = reader.read();
109 break;
110
111 case ';':
112 return new Token(";", line);
113
114
115 case '<':
116 case '>':
117 ic = reader.read();
118 if (ic == '=') {
119 return new Token(c + "=", line);
120 } else if (ic == c) {
121 ic = reader.read();
122 if (ic == '=') {
123 return new Token(c + c + "=", line);
124 } else {
125 reader.unread(ic);
126 return new Token(String.valueOf(c) + c, line);
127 }
128 } else {
129 reader.unread(ic);
130 return new Token(String.valueOf(c), line);
131 }
132
133
134 case '=':
135 case '&':
136 case '|':
137 case '+':
138 case '-':
139 ic = reader.read();
140 if (ic == '=' || ic == c) {
141 return new Token(c + String.valueOf((char) ic), line);
142 } else {
143 reader.unread(ic);
144 return new Token(String.valueOf(c), line);
145 }
146
147
148 case '!':
149 case '*':
150 case '%':
151 case '^':
152 case '~':
153 ic = reader.read();
154 if (ic == '=') {
155 return new Token(c + "=", line);
156 } else {
157 reader.unread(ic);
158 return new Token(String.valueOf(c), line);
159 }
160
161
162 case '"':
163 case '\'':
164 int beginLine = line;
165 b = new StringBuilder();
166 b.append(c);
167 while ((ic = reader.read()) != c) {
168 if (ic == -1) {
169 break;
170 }
171 b.append((char) ic);
172 if (ic == '\\') {
173 int next = reader.read();
174 if (next != -1) {
175 b.append((char) next);
176
177 if (next == '\n') {
178 line++;
179 }
180 }
181 } else if (ic == '\n') {
182 line++;
183 }
184 }
185 if (ic != -1) {
186 b.append((char) ic);
187 }
188 return new Token(b.toString(), beginLine);
189
190
191 case '/':
192 switch (c = (char) (ic = reader.read())) {
193 case '*':
194
195 int state = 1;
196 b = new StringBuilder();
197 b.append("/*");
198
199 while ((ic = reader.read()) != -1) {
200 c = (char) ic;
201 b.append(c);
202
203 if (c == '\n') {
204 line++;
205 }
206
207 if (state == 1) {
208 if (c == '*') {
209 state = 2;
210 }
211 } else {
212 if (c == '/') {
213 ic = reader.read();
214 break;
215 } else if (c != '*') {
216 state = 1;
217 }
218 }
219 }
220
221
222
223 break;
224
225 case '/':
226 b = new StringBuilder();
227 b.append("//");
228 while ((ic = reader.read()) != '\n') {
229 if (ic == -1) {
230 break;
231 }
232 b.append((char) ic);
233 }
234
235
236
237 break;
238
239 case '=':
240 return new Token("/=", line);
241
242 default:
243 reader.unread(ic);
244 return new Token("/", line);
245 }
246 break;
247
248 default:
249
250 if (Character.isJavaIdentifierStart(c)) {
251 b = new StringBuilder();
252 do {
253 b.append(c);
254 c = (char) (ic = reader.read());
255 } while (Character.isJavaIdentifierPart(c));
256 reader.unread(ic);
257 return new Token(b.toString(), line);
258 }
259
260 else if (Character.isDigit(c) || c == '.') {
261 b = new StringBuilder();
262 do {
263 b.append(c);
264 if (c == 'e' || c == 'E') {
265 c = (char) (ic = reader.read());
266 if ("1234567890-".indexOf(c) == -1) {
267 break;
268 }
269 b.append(c);
270 }
271 c = (char) (ic = reader.read());
272 } while ("1234567890.iIlLfFdDsSuUeExX".indexOf(c) != -1);
273 reader.unread(ic);
274 return new Token(b.toString(), line);
275 }
276
277 else {
278 return new Token(String.valueOf(c), line);
279 }
280 }
281 }
282 } catch (IOException e) {
283 e.printStackTrace();
284 }
285 endOfFile = true;
286 return Token.EOF;
287 }
288
289 @Override
290 public void close() throws IOException {
291 reader.close();
292 }
293 }
294
295 private static class Token {
296 public static final Token EOF = new Token("EOF", -1);
297
298 public final String image;
299 public final int lineNumber;
300
301 public Token(String image, int lineNumber) {
302 this.image = image;
303 this.lineNumber = lineNumber;
304 }
305 }
306 }