|
1 |
| |
|
2 |
| |
|
3 |
| |
|
4 |
| package net.sourceforge.pmd.cpd; |
|
5 |
| |
|
6 |
| import java.util.ArrayList; |
|
7 |
| import java.util.Collections; |
|
8 |
| import java.util.HashMap; |
|
9 |
| import java.util.HashSet; |
|
10 |
| import java.util.Iterator; |
|
11 |
| import java.util.List; |
|
12 |
| import java.util.Map; |
|
13 |
| import java.util.Set; |
|
14 |
| |
|
15 |
| public class MatchCollector { |
|
16 |
| |
|
17 |
| private MatchAlgorithm ma; |
|
18 |
| private Map startMap = new HashMap(); |
|
19 |
| private Map fileMap = new HashMap(); |
|
20 |
| |
|
21 |
2
| public MatchCollector(MatchAlgorithm ma) {
|
|
22 |
2
| this.ma = ma;
|
|
23 |
| } |
|
24 |
| |
|
25 |
8
| public void collect(List marks) {
|
|
26 |
| |
|
27 |
8
| for (int i = 0; i < marks.size() - 1; i++) {
|
|
28 |
12
| TokenEntry mark1 = (TokenEntry) marks.get(i);
|
|
29 |
12
| for (int j = i + 1; j < marks.size(); j++) {
|
|
30 |
16
| TokenEntry mark2 = (TokenEntry) marks.get(j);
|
|
31 |
16
| int diff = mark1.getIndex() - mark2.getIndex();
|
|
32 |
16
| if (-diff < ma.getMinimumTileSize()) {
|
|
33 |
0
| continue;
|
|
34 |
| } |
|
35 |
16
| if (hasPreviousDupe(mark1, mark2)) {
|
|
36 |
12
| continue;
|
|
37 |
| } |
|
38 |
| |
|
39 |
| |
|
40 |
4
| int dupes = countDuplicateTokens(mark1, mark2);
|
|
41 |
4
| if (dupes < ma.getMinimumTileSize()) {
|
|
42 |
0
| continue;
|
|
43 |
| } |
|
44 |
| |
|
45 |
4
| if (diff + dupes >= 1) {
|
|
46 |
0
| continue;
|
|
47 |
| } |
|
48 |
4
| determineMatch(mark1, mark2, dupes);
|
|
49 |
| } |
|
50 |
| } |
|
51 |
| } |
|
52 |
| |
|
53 |
2
| public List getMatches() {
|
|
54 |
2
| List matchList = new ArrayList(startMap.values());
|
|
55 |
2
| Collections.sort(matchList);
|
|
56 |
2
| Set matchSet = new HashSet();
|
|
57 |
2
| Match.MatchCode matchCode = new Match.MatchCode();
|
|
58 |
2
| for (int i = matchList.size(); i > 1; i--) {
|
|
59 |
1
| Match match1 = (Match) matchList.get(i - 1);
|
|
60 |
1
| TokenEntry mark1 = (TokenEntry) match1.getMarkSet().iterator().next();
|
|
61 |
1
| matchSet.clear();
|
|
62 |
1
| matchSet.add(match1.getMatchCode());
|
|
63 |
1
| for (int j = i - 1; j > 0; j--) {
|
|
64 |
2
| Match match2 = (Match) matchList.get(j - 1);
|
|
65 |
2
| if (match1.getTokenCount() != match2.getTokenCount()) {
|
|
66 |
0
| break;
|
|
67 |
| } |
|
68 |
2
| TokenEntry mark2 = null;
|
|
69 |
3
| for (Iterator iter = match2.getMarkSet().iterator(); iter.hasNext();) {
|
|
70 |
3
| mark2 = (TokenEntry) iter.next();
|
|
71 |
3
| if (mark2 != mark1) {
|
|
72 |
2
| break;
|
|
73 |
| } |
|
74 |
| } |
|
75 |
2
| int dupes = countDuplicateTokens(mark1, mark2);
|
|
76 |
2
| if (dupes < match1.getTokenCount()) {
|
|
77 |
0
| break;
|
|
78 |
| } |
|
79 |
2
| matchSet.add(match2.getMatchCode());
|
|
80 |
2
| match1.getMarkSet().addAll(match2.getMarkSet());
|
|
81 |
2
| matchList.remove(i - 2);
|
|
82 |
2
| i--;
|
|
83 |
| } |
|
84 |
1
| if (matchSet.size() == 1) {
|
|
85 |
0
| continue;
|
|
86 |
| } |
|
87 |
| |
|
88 |
1
| Set pruned = match1.getMarkSet();
|
|
89 |
1
| boolean done = false;
|
|
90 |
1
| ArrayList a1 = new ArrayList(match1.getMarkSet());
|
|
91 |
1
| Collections.sort(a1);
|
|
92 |
1
| for (int outer = 0; outer < a1.size() - 1 && !done; outer++) {
|
|
93 |
2
| TokenEntry cmark1 = (TokenEntry) a1.get(outer);
|
|
94 |
2
| for (int inner = outer + 1; inner < a1.size() && !done; inner++) {
|
|
95 |
3
| TokenEntry cmark2 = (TokenEntry) a1.get(inner);
|
|
96 |
3
| matchCode.setFirst(cmark1.getIndex());
|
|
97 |
3
| matchCode.setSecond(cmark2.getIndex());
|
|
98 |
3
| if (!matchSet.contains(matchCode)) {
|
|
99 |
0
| if (pruned.size() > 2) {
|
|
100 |
0
| pruned.remove(cmark2);
|
|
101 |
| } |
|
102 |
0
| if (pruned.size() == 2) {
|
|
103 |
0
| done = true;
|
|
104 |
| } |
|
105 |
| } |
|
106 |
| } |
|
107 |
| } |
|
108 |
| } |
|
109 |
2
| return matchList;
|
|
110 |
| } |
|
111 |
| |
|
112 |
| |
|
113 |
| |
|
114 |
| |
|
115 |
4
| private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
|
|
116 |
4
| Match match = new Match(dupes, mark1, mark2);
|
|
117 |
4
| String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
|
|
118 |
4
| List pairMatches = (ArrayList) fileMap.get(fileKey);
|
|
119 |
4
| if (pairMatches == null) {
|
|
120 |
2
| pairMatches = new ArrayList();
|
|
121 |
2
| fileMap.put(fileKey, pairMatches);
|
|
122 |
| } |
|
123 |
4
| boolean add = true;
|
|
124 |
4
| for (int i = 0; i < pairMatches.size(); i++) {
|
|
125 |
3
| Match other = (Match) pairMatches.get(i);
|
|
126 |
3
| if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex()
|
|
127 |
| > 0) { |
|
128 |
1
| boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
|
|
129 |
1
| if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
|
|
130 |
| || (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) { |
|
131 |
0
| if (other.getTokenCount() >= match.getTokenCount()) {
|
|
132 |
0
| add = false;
|
|
133 |
0
| break;
|
|
134 |
| } else { |
|
135 |
0
| pairMatches.remove(i);
|
|
136 |
0
| startMap.remove(other.getMatchCode());
|
|
137 |
| } |
|
138 |
| } |
|
139 |
| } |
|
140 |
| } |
|
141 |
4
| if (add) {
|
|
142 |
4
| pairMatches.add(match);
|
|
143 |
4
| startMap.put(match.getMatchCode(), match);
|
|
144 |
| } |
|
145 |
| } |
|
146 |
| |
|
147 |
16
| private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {
|
|
148 |
16
| if (mark1.getIndex() == 0) {
|
|
149 |
0
| return false;
|
|
150 |
| } |
|
151 |
16
| return !matchEnded(ma.tokenAt(-1, mark1), ma.tokenAt(-1, mark2));
|
|
152 |
| } |
|
153 |
| |
|
154 |
6
| private int countDuplicateTokens(TokenEntry mark1, TokenEntry mark2) {
|
|
155 |
6
| int index = 0;
|
|
156 |
6
| while (!matchEnded(ma.tokenAt(index, mark1), ma.tokenAt(index, mark2))) {
|
|
157 |
48
| index++;
|
|
158 |
| } |
|
159 |
6
| return index;
|
|
160 |
| } |
|
161 |
| |
|
162 |
70
| private boolean matchEnded(TokenEntry token1, TokenEntry token2) {
|
|
163 |
70
| return token1.getIdentifier() != token2.getIdentifier() || token1 == TokenEntry.EOF || token2 == TokenEntry.EOF;
|
|
164 |
| } |
|
165 |
| } |