|
1 |
| |
|
2 |
| |
|
3 |
| |
|
4 |
| |
|
5 |
| package net.sourceforge.pmd.cpd; |
|
6 |
| |
|
7 |
| import java.util.List; |
|
8 |
| |
|
9 |
| public class RubyTokenizer implements Tokenizer { |
|
10 |
| private boolean downcaseString = true; |
|
11 |
| |
|
12 |
0
| public void tokenize(SourceCode tokens, Tokens tokenEntries) {
|
|
13 |
0
| List code = tokens.getCode();
|
|
14 |
0
| for (int i = 0; i < code.size(); i++) {
|
|
15 |
0
| String currentLine = (String) code.get(i);
|
|
16 |
0
| int loc = 0;
|
|
17 |
0
| while (loc < currentLine.length()) {
|
|
18 |
0
| StringBuffer token = new StringBuffer();
|
|
19 |
0
| loc = getTokenFromLine(currentLine, token, loc);
|
|
20 |
0
| if (token.length() > 0 && !isIgnorableString(token.toString())) {
|
|
21 |
0
| if (downcaseString) {
|
|
22 |
0
| token = new StringBuffer(token.toString().toLowerCase());
|
|
23 |
| } |
|
24 |
0
| tokenEntries.add(new TokenEntry(token.toString(),
|
|
25 |
| tokens.getFileName(), |
|
26 |
| i + 1)); |
|
27 |
| } |
|
28 |
| } |
|
29 |
| } |
|
30 |
0
| tokenEntries.add(TokenEntry.getEOF());
|
|
31 |
| } |
|
32 |
| |
|
33 |
0
| private int getTokenFromLine(String line, StringBuffer token, int loc) {
|
|
34 |
0
| for (int j = loc; j < line.length(); j++) {
|
|
35 |
0
| char tok = line.charAt(j);
|
|
36 |
0
| if (!Character.isWhitespace(tok) && !ignoreCharacter(tok)) {
|
|
37 |
0
| if (isComment(tok)) {
|
|
38 |
0
| if (token.length() > 0) {
|
|
39 |
0
| return j;
|
|
40 |
| } else { |
|
41 |
0
| return getCommentToken(line, token, loc);
|
|
42 |
| } |
|
43 |
0
| } else if (isString(tok)) {
|
|
44 |
0
| if (token.length() > 0) {
|
|
45 |
| |
|
46 |
0
| return j;
|
|
47 |
| } else { |
|
48 |
| |
|
49 |
0
| return parseString(line, token, j, tok);
|
|
50 |
| } |
|
51 |
| } else { |
|
52 |
0
| token.append(tok);
|
|
53 |
| } |
|
54 |
| } else { |
|
55 |
0
| if (token.length() > 0) {
|
|
56 |
0
| return j;
|
|
57 |
| } |
|
58 |
| } |
|
59 |
0
| loc = j;
|
|
60 |
| } |
|
61 |
0
| return loc + 1;
|
|
62 |
| } |
|
63 |
| |
|
64 |
0
| private int parseString(String line, StringBuffer token, int loc, char stringType) {
|
|
65 |
0
| boolean escaped = false;
|
|
66 |
0
| boolean done = false;
|
|
67 |
| |
|
68 |
| |
|
69 |
| |
|
70 |
0
| char tok = ' ';
|
|
71 |
0
| while ((loc < line.length()) && !done) {
|
|
72 |
0
| tok = line.charAt(loc);
|
|
73 |
0
| if (escaped && tok == stringType) {
|
|
74 |
| |
|
75 |
0
| escaped = false;
|
|
76 |
0
| } else if (tok == stringType && (token.length() > 0)) {
|
|
77 |
| |
|
78 |
| |
|
79 |
0
| done = true;
|
|
80 |
0
| } else if (tok == '\\') {
|
|
81 |
| |
|
82 |
0
| escaped = true;
|
|
83 |
| } else { |
|
84 |
| |
|
85 |
0
| escaped = false;
|
|
86 |
| } |
|
87 |
| |
|
88 |
0
| token.append(tok);
|
|
89 |
0
| loc++;
|
|
90 |
| } |
|
91 |
0
| return loc + 1;
|
|
92 |
| } |
|
93 |
| |
|
94 |
0
| private boolean ignoreCharacter(char tok) {
|
|
95 |
0
| boolean result = false;
|
|
96 |
0
| switch (tok) {
|
|
97 |
0
| case '{':
|
|
98 |
0
| case '}':
|
|
99 |
0
| case '(':
|
|
100 |
0
| case ')':
|
|
101 |
0
| case ';':
|
|
102 |
0
| case ',':
|
|
103 |
0
| result = true;
|
|
104 |
0
| break;
|
|
105 |
0
| default :
|
|
106 |
0
| result = false;
|
|
107 |
| } |
|
108 |
0
| return result;
|
|
109 |
| } |
|
110 |
| |
|
111 |
0
| private boolean isString(char tok) {
|
|
112 |
0
| boolean result = false;
|
|
113 |
0
| switch (tok) {
|
|
114 |
0
| case '\'':
|
|
115 |
0
| case '"':
|
|
116 |
0
| result = true;
|
|
117 |
0
| break;
|
|
118 |
0
| default:
|
|
119 |
0
| result = false;
|
|
120 |
| } |
|
121 |
0
| return result;
|
|
122 |
| } |
|
123 |
| |
|
124 |
0
| private boolean isComment(char tok) {
|
|
125 |
0
| return tok == '#';
|
|
126 |
| } |
|
127 |
| |
|
128 |
0
| private int getCommentToken(String line, StringBuffer token, int loc) {
|
|
129 |
0
| while (loc < line.length()) {
|
|
130 |
0
| token.append(line.charAt(loc));
|
|
131 |
0
| loc++;
|
|
132 |
| } |
|
133 |
0
| return loc;
|
|
134 |
| } |
|
135 |
| |
|
136 |
0
| private boolean isIgnorableString(String token) {
|
|
137 |
0
| return "do".equals(token) || "end".equals(token);
|
|
138 |
| } |
|
139 |
| } |