001 /** 002 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html 003 */ 004 package org.sonar.duplications.cpd; 005 006 import java.util.ArrayList; 007 import java.util.Collections; 008 import java.util.HashMap; 009 import java.util.HashSet; 010 import java.util.Iterator; 011 import java.util.List; 012 import java.util.Map; 013 import java.util.Set; 014 015 import net.sourceforge.pmd.cpd.TokenEntry; 016 017 public class MatchCollector { 018 019 private MatchAlgorithm ma; 020 private Map<Match.MatchCode, Match> startMap = new HashMap<Match.MatchCode, Match>(); 021 private Map<String, List<Match>> fileMap = new HashMap<String, List<Match>>(); 022 023 public MatchCollector(MatchAlgorithm ma) { 024 this.ma = ma; 025 } 026 027 public void collect(List<TokenEntry> marks) { 028 // first get a pairwise collection of all maximal matches 029 for (int i = 0; i < marks.size() - 1; i++) { 030 TokenEntry mark1 = marks.get(i); 031 for (int j = i + 1; j < marks.size(); j++) { 032 TokenEntry mark2 = marks.get(j); 033 int diff = mark1.getIndex() - mark2.getIndex(); 034 if ( -diff < ma.getMinimumTileSize()) { 035 continue; 036 } 037 if (hasPreviousDupe(mark1, mark2)) { 038 continue; 039 } 040 041 // "match too small" check 042 int dupes = countDuplicateTokens(mark1, mark2); 043 if (dupes < ma.getMinimumTileSize()) { 044 continue; 045 } 046 // is it still too close together 047 if (diff + dupes >= 1) { 048 continue; 049 } 050 determineMatch(mark1, mark2, dupes); 051 } 052 } 053 } 054 055 @SuppressWarnings("PMD.CompareObjectsWithEquals") 056 public List<Match> getMatches() { 057 List<Match> matchList = new ArrayList<Match>(startMap.values()); 058 Collections.sort(matchList); 059 Set<Match.MatchCode> matchSet = new HashSet<Match.MatchCode>(); 060 Match.MatchCode matchCode = new Match.MatchCode(); 061 for (int i = matchList.size(); i > 1; i--) { 062 Match match1 = matchList.get(i - 1); 063 TokenEntry mark1 = match1.getMarkSet().iterator().next(); 064 matchSet.clear(); 065 matchSet.add(match1.getMatchCode()); 066 for (int j = i - 1; j > 0; j--) { 067 Match match2 = matchList.get(j - 1); 068 if (match1.getTokenCount() != match2.getTokenCount()) { 069 break; 070 } 071 TokenEntry mark2 = null; 072 for (Iterator<TokenEntry> iter = match2.getMarkSet().iterator(); iter.hasNext();) { 073 mark2 = iter.next(); 074 if (mark2 != mark1) { 075 break; 076 } 077 } 078 int dupes = countDuplicateTokens(mark1, mark2); 079 if (dupes < match1.getTokenCount()) { 080 break; 081 } 082 matchSet.add(match2.getMatchCode()); 083 match1.getMarkSet().addAll(match2.getMarkSet()); 084 matchList.remove(i - 2); 085 i--; 086 } 087 if (matchSet.size() == 1) { 088 continue; 089 } 090 // prune the mark set 091 Set<TokenEntry> pruned = match1.getMarkSet(); 092 boolean done = false; 093 ArrayList<TokenEntry> a1 = new ArrayList<TokenEntry>(match1.getMarkSet()); 094 Collections.sort(a1); 095 for (int outer = 0; outer < a1.size() - 1 && !done; outer++) { 096 TokenEntry cmark1 = a1.get(outer); 097 for (int inner = outer + 1; inner < a1.size() && !done; inner++) { 098 TokenEntry cmark2 = a1.get(inner); 099 matchCode.setFirst(cmark1.getIndex()); 100 matchCode.setSecond(cmark2.getIndex()); 101 if ( !matchSet.contains(matchCode)) { 102 if (pruned.size() > 2) { 103 pruned.remove(cmark2); 104 } 105 if (pruned.size() == 2) { 106 done = true; 107 } 108 } 109 } 110 } 111 } 112 return matchList; 113 } 114 115 /** 116 * A greedy algorithm for determining non-overlapping matches 117 */ 118 private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) { 119 Match match = new Match(dupes, mark1, mark2); 120 String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID(); 121 List<Match> pairMatches = fileMap.get(fileKey); 122 if (pairMatches == null) { 123 pairMatches = new ArrayList<Match>(); 124 fileMap.put(fileKey, pairMatches); 125 } 126 boolean add = true; 127 for (int i = 0; i < pairMatches.size(); i++) { 128 Match other = pairMatches.get(i); 129 if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex() > 0) { 130 boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0; 131 if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0)) 132 || ( !ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) { 133 if (other.getTokenCount() >= match.getTokenCount()) { 134 add = false; 135 break; 136 } else { 137 pairMatches.remove(i); 138 startMap.remove(other.getMatchCode()); 139 } 140 } 141 } 142 } 143 if (add) { 144 pairMatches.add(match); 145 startMap.put(match.getMatchCode(), match); 146 } 147 } 148 149 private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) { 150 if (mark1.getIndex() == 0) { 151 return false; 152 } 153 return !matchEnded(ma.tokenAt( -1, mark1), ma.tokenAt( -1, mark2)); 154 } 155 156 private int countDuplicateTokens(TokenEntry mark1, TokenEntry mark2) { 157 int index = 0; 158 while ( !matchEnded(ma.tokenAt(index, mark1), ma.tokenAt(index, mark2))) { 159 index++; 160 } 161 return index; 162 } 163 164 private boolean matchEnded(TokenEntry token1, TokenEntry token2) { 165 return token1.getIdentifier() != token2.getIdentifier() || token1 == TokenEntry.EOF || token2 == TokenEntry.EOF; 166 } 167 }