diff --git a/util/src/main/java/ds/tree/DuplicateKeyException.java b/util/src/main/java/ds/tree/DuplicateKeyException.java deleted file mode 100644 index 5f660b39..00000000 --- a/util/src/main/java/ds/tree/DuplicateKeyException.java +++ /dev/null @@ -1,41 +0,0 @@ -/* -The MIT License - -Copyright (c) 2008 Tahseen Ur Rehman - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -package ds.tree; - -/** - * excepion thrown if a duplicate key is inserted in a {@link RadixTree} - * - * @author Tahseen Ur Rehman - * email: tahseen.ur.rehman {at.spam.me.not} gmail.com - */ -public class DuplicateKeyException extends RuntimeException -{ - private static final long serialVersionUID = 3141795907493885706L; - - public DuplicateKeyException(String msg) - { - super(msg); - } -} diff --git a/util/src/main/java/ds/tree/RadixTree.java b/util/src/main/java/ds/tree/RadixTree.java deleted file mode 100644 index 7055e049..00000000 --- a/util/src/main/java/ds/tree/RadixTree.java +++ /dev/null @@ -1,115 +0,0 @@ -/* -The MIT License - -Copyright (c) 2008 Tahseen Ur Rehman - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -package ds.tree; - -import java.util.ArrayList; - -/** - * This interface represent the operation of a radix tree. A radix tree, - * Patricia trie/tree, or crit bit tree is a specialized set data structure - * based on the trie that is used to store a set of strings. In contrast with a - * regular trie, the edges of a Patricia trie are labelled with sequences of - * characters rather than with single characters. These can be strings of - * characters, bit strings such as integers or IP addresses, or generally - * arbitrary sequences of objects in lexicographical order. Sometimes the names - * radix tree and crit bit tree are only applied to trees storing integers and - * Patricia trie is retained for more general inputs, but the structure works - * the same way in all cases. - * - * @author Tahseen Ur Rehman - * email: tahseen.ur.rehman {at.spam.me.not} gmail.com - */ -public interface RadixTree { - /** - * Insert a new string key and its value to the tree. - * - * @param key - * The string key of the object - * @param value - * The value that need to be stored corresponding to the given - * key. - * @throws DuplicateKeyException - */ - public void insert(String key, T value); - - /** - * Delete a key and its associated value from the tree. - * @param key The key of the node that need to be deleted - * @return True if the key was deleted, false if not found - */ - public boolean delete(String key); - - /** - * Find a value based on its corresponding key. - * - * @param key The key for which to search the tree. - * @return The value corresponding to the key. null if iot can not find the key - */ - public T find(String key); - - /** - * Find an existing entry and replace it's value. If no existing entry, do nothing - * - * @param key The key for which to search the tree. - * @param value The value to set for the entry - * @return true if an entry was found for the given key, false if not found - */ - public boolean replace(String key, final T value); - - /** - * Check if the tree contains any entry corresponding to the given key. - * - * @param key The key that needto be searched in the tree. - * @return retun true if the key is present in the tree otherwise false - */ - public boolean contains(String key); - - /** - * Search for all the keys that start with given prefix. limiting the results based on the supplied limit. - * - * @param prefix The prefix for which keys need to be search - * @param recordLimit The limit for the results - * @return The list of values those key start with the given prefix - */ - public ArrayList searchPrefix(String prefix, int recordLimit); - - /** - * Return the size of the Radix tree - * @return the size of the tree - */ - public long getSize(); - - /** - * Complete the a prefix to the point where ambiguity starts. - * - * Example: - * If a tree contain "blah1", "blah2" - * complete("b") -> return "blah" - * - * @param prefix The prefix we want to complete - * @return The unambiguous completion of the string. - */ - public String complete(String prefix); -} diff --git a/util/src/main/java/ds/tree/RadixTreeImpl.java b/util/src/main/java/ds/tree/RadixTreeImpl.java deleted file mode 100644 index 017ea877..00000000 --- a/util/src/main/java/ds/tree/RadixTreeImpl.java +++ /dev/null @@ -1,462 +0,0 @@ -/* -The MIT License - -Copyright (c) 2008 Tahseen Ur Rehman, Javid Jamae - -http://code.google.com/p/radixtree/ - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -package ds.tree; - -import java.util.ArrayList; -import java.util.Formattable; -import java.util.Formatter; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.Queue; - -/** - * Implementation for Radix tree {@link RadixTree} - * - * @author Tahseen Ur Rehman (tahseen.ur.rehman {at.spam.me.not} gmail.com) - * @author Javid Jamae - * @author Dennis Heidsiek - */ -public class RadixTreeImpl implements RadixTree, Formattable { - - protected RadixTreeNode root; - - protected long size; - - /** - * Create a Radix Tree with only the default node root. - */ - public RadixTreeImpl() { - root = new RadixTreeNode(); - root.setKey(""); - size = 0; - } - - public T find(String key) { - Visitor visitor = new VisitorImpl() { - - public void visit(String key, RadixTreeNode parent, - RadixTreeNode node) { - if (node.isReal()) - result = node.getValue(); - } - }; - - visit(key, visitor); - - return visitor.getResult(); - } - - public boolean replace(String key, final T value) { - Visitor visitor = new VisitorImpl() { - public void visit(String key, RadixTreeNode parent, RadixTreeNode node) { - if (node.isReal()) { - node.setValue(value); - result = value; - } else { - result = null; - } - } - }; - - visit(key, visitor); - - return visitor.getResult() != null; - } - - public boolean delete(String key) { - Visitor visitor = new VisitorImpl(Boolean.FALSE) { - public void visit(String key, RadixTreeNode parent, - RadixTreeNode node) { - result = node.isReal(); - - // if it is a real node - if (result) { - // If there no children of the node we need to - // delete it from the its parent children list - if (node.getChildern().size() == 0) { - Iterator> it = parent.getChildern() - .iterator(); - while (it.hasNext()) { - if (it.next().getKey().equals(node.getKey())) { - it.remove(); - break; - } - } - - // if parent is not real node and has only one child - // then they need to be merged. - if (parent.getChildern().size() == 1 - && parent.isReal() == false) { - mergeNodes(parent, parent.getChildern().get(0)); - } - } else if (node.getChildern().size() == 1) { - // we need to merge the only child of this node with - // itself - mergeNodes(node, node.getChildern().get(0)); - } else { // we jus need to mark the node as non real. - node.setReal(false); - } - } - } - - /** - * Merge a child into its parent node. Operation only valid if it is - * only child of the parent node and parent node is not a real node. - * - * @param parent - * The parent Node - * @param child - * The child Node - */ - private void mergeNodes(RadixTreeNode parent, - RadixTreeNode child) { - parent.setKey(parent.getKey() + child.getKey()); - parent.setReal(child.isReal()); - parent.setValue(child.getValue()); - parent.setChildern(child.getChildern()); - } - - }; - - visit(key, visitor); - - if(visitor.getResult()) { - size--; - } - return visitor.getResult().booleanValue(); - } - - /* - * (non-Javadoc) - * @see ds.tree.RadixTree#insert(java.lang.String, java.lang.Object) - */ - public void insert(String key, T value) throws DuplicateKeyException { - try { - insert(key, root, value); - } catch (DuplicateKeyException e) { - // re-throw the exception with 'key' in the message - throw new DuplicateKeyException("Duplicate key: '" + key + "'"); - } - size++; - } - - /** - * Recursively insert the key in the radix tree. - * - * @param key The key to be inserted - * @param node The current node - * @param value The value associated with the key - * @throws DuplicateKeyException If the key already exists in the database. - */ - private void insert(String key, RadixTreeNode node, T value) - throws DuplicateKeyException { - - int numberOfMatchingCharacters = node.getNumberOfMatchingCharacters(key); - - // we are either at the root node - // or we need to go down the tree - if (node.getKey().equals("") == true || numberOfMatchingCharacters == 0 || (numberOfMatchingCharacters < key.length() && numberOfMatchingCharacters >= node.getKey().length())) { - boolean flag = false; - String newText = key.substring(numberOfMatchingCharacters, key.length()); - for (RadixTreeNode child : node.getChildern()) { - if (child.getKey().startsWith(newText.charAt(0) + "")) { - flag = true; - insert(newText, child, value); - break; - } - } - - // just add the node as the child of the current node - if (flag == false) { - RadixTreeNode n = new RadixTreeNode(); - n.setKey(newText); - n.setReal(true); - n.setValue(value); - - node.getChildern().add(n); - } - } - // there is a exact match just make the current node as data node - else if (numberOfMatchingCharacters == key.length() && numberOfMatchingCharacters == node.getKey().length()) { - if (node.isReal() == true) { - throw new DuplicateKeyException("Duplicate key"); - } - - node.setReal(true); - node.setValue(value); - } - // This node need to be split as the key to be inserted - // is a prefix of the current node key - else if (numberOfMatchingCharacters > 0 && numberOfMatchingCharacters < node.getKey().length()) { - RadixTreeNode n1 = new RadixTreeNode(); - n1.setKey(node.getKey().substring(numberOfMatchingCharacters, node.getKey().length())); - n1.setReal(node.isReal()); - n1.setValue(node.getValue()); - n1.setChildern(node.getChildern()); - - node.setKey(key.substring(0, numberOfMatchingCharacters)); - node.setReal(false); - node.setChildern(new ArrayList>()); - node.getChildern().add(n1); - - if(numberOfMatchingCharacters < key.length()) { - RadixTreeNode n2 = new RadixTreeNode(); - n2.setKey(key.substring(numberOfMatchingCharacters, key.length())); - n2.setReal(true); - n2.setValue(value); - - node.getChildern().add(n2); - } else { - node.setValue(value); - node.setReal(true); - } - } - // this key need to be added as the child of the current node - else { - RadixTreeNode n = new RadixTreeNode(); - n.setKey(node.getKey().substring(numberOfMatchingCharacters, node.getKey().length())); - n.setChildern(node.getChildern()); - n.setReal(node.isReal()); - n.setValue(node.getValue()); - - node.setKey(key); - node.setReal(true); - node.setValue(value); - - node.getChildern().add(n); - } - } - - public ArrayList searchPrefix(String key, int recordLimit) { - ArrayList keys = new ArrayList(); - - RadixTreeNode node = searchPefix(key, root); - - if (node != null) { - if (node.isReal()) { - keys.add(node.getValue()); - } - getNodes(node, keys, recordLimit); - } - - return keys; - } - - private void getNodes(RadixTreeNode parent, ArrayList keys, int limit) { - Queue> queue = new LinkedList>(); - - queue.addAll(parent.getChildern()); - - while (!queue.isEmpty()) { - RadixTreeNode node = queue.remove(); - if (node.isReal() == true) { - keys.add(node.getValue()); - } - - if (keys.size() == limit) { - break; - } - - queue.addAll(node.getChildern()); - } - } - - private RadixTreeNode searchPefix(String key, RadixTreeNode node) { - RadixTreeNode result = null; - - int numberOfMatchingCharacters = node.getNumberOfMatchingCharacters(key); - - if (numberOfMatchingCharacters == key.length() && numberOfMatchingCharacters <= node.getKey().length()) { - result = node; - } else if (node.getKey().equals("") == true - || (numberOfMatchingCharacters < key.length() && numberOfMatchingCharacters >= node.getKey().length())) { - String newText = key.substring(numberOfMatchingCharacters, key.length()); - for (RadixTreeNode child : node.getChildern()) { - if (child.getKey().startsWith(newText.charAt(0) + "")) { - result = searchPefix(newText, child); - break; - } - } - } - - return result; - } - - public boolean contains(String key) { - Visitor visitor = new VisitorImpl(Boolean.FALSE) { - public void visit(String key, RadixTreeNode parent, - RadixTreeNode node) { - result = node.isReal(); - } - }; - - visit(key, visitor); - - return visitor.getResult().booleanValue(); - } - - /** - * visit the node those key matches the given key - * @param key The key that need to be visited - * @param visitor The visitor object - */ - public void visit(String key, Visitor visitor) { - if (root != null) { - visit(key, visitor, null, root); - } - } - - /** - * recursively visit the tree based on the supplied "key". calls the Visitor - * for the node those key matches the given prefix - * - * @param prefix - * The key o prefix to search in the tree - * @param visitor - * The Visitor that will be called if a node with "key" as its - * key is found - * @param node - * The Node from where onward to search - */ - private void visit(String prefix, Visitor visitor, - RadixTreeNode parent, RadixTreeNode node) { - - int numberOfMatchingCharacters = node.getNumberOfMatchingCharacters(prefix); - - // if the node key and prefix match, we found a match! - if (numberOfMatchingCharacters == prefix.length() && numberOfMatchingCharacters == node.getKey().length()) { - visitor.visit(prefix, parent, node); - } else if (node.getKey().equals("") == true // either we are at the - // root - || (numberOfMatchingCharacters < prefix.length() && numberOfMatchingCharacters >= node.getKey().length())) { // OR we need to - // traverse the childern - String newText = prefix.substring(numberOfMatchingCharacters, prefix.length()); - for (RadixTreeNode child : node.getChildern()) { - // recursively search the child nodes - if (child.getKey().startsWith(newText.charAt(0) + "")) { - visit(newText, visitor, node, child); - break; - } - } - } - } - - public long getSize() { - return size; - } - - /** - * Display the Trie on console. - * - * WARNING! Do not use this for a large Trie, it's for testing purpose only. - * @see #formatTo - */ - @Deprecated - public void display() { - formatNodeTo(new Formatter(System.out), 0, root); - } - - @Deprecated - private void display(int level, RadixTreeNode node) { - formatNodeTo(new Formatter(System.out), level, node); - } - - /** - * WARNING! Do not use this for a large Trie, it's for testing purpose only. - */ - private void formatNodeTo(Formatter f, int level, RadixTreeNode node) { - for (int i = 0; i < level; i++) { - f.format(" "); - } - f.format("|"); - for (int i = 0; i < level; i++) { - f.format("-"); - } - - if (node.isReal() == true) - f.format("%s[%s]*%n", node.getKey(), node.getValue()); - else - f.format("%s%n", node.getKey()); - - for (RadixTreeNode child : node.getChildern()) { - formatNodeTo(f, level + 1, child); - } - } - - /** - * Writes a textual representation of this tree to the given formatter. - * - * Currently, all options are simply ignored. - * - * WARNING! Do not use this for a large Trie, it's for testing purpose only. - */ - public void formatTo(Formatter formatter, int flags, int width, int precision) { - formatNodeTo(formatter, 0, root); - } - - /** - * Complete the a prefix to the point where ambiguity starts. - * - * Example: - * If a tree contain "blah1", "blah2" - * complete("b") -> return "blah" - * - * @param prefix The prefix we want to complete - * @return The unambiguous completion of the string. - */ - public String complete(String prefix) { - return complete(prefix, root, ""); - } - - private String complete(String key, RadixTreeNode node, String base) { - int i = 0; - int keylen = key.length(); - int nodelen = node.getKey().length(); - - while (i < keylen && i < nodelen) { - if (key.charAt(i) != node.getKey().charAt(i)) { - break; - } - i++; - } - - if (i == keylen && i <= nodelen) { - return base + node.getKey(); - } - else if (nodelen == 0 || (i < keylen && i >= nodelen)) { - String beginning = key.substring(0, i); - String ending = key.substring(i, keylen); - for (RadixTreeNode child : node.getChildern()) { - if (child.getKey().startsWith(ending.charAt(0) + "")) { - return complete(ending, child, base + beginning); - } - } - } - - return ""; - } -} \ No newline at end of file diff --git a/util/src/main/java/ds/tree/RadixTreeNode.java b/util/src/main/java/ds/tree/RadixTreeNode.java deleted file mode 100644 index fabdd389..00000000 --- a/util/src/main/java/ds/tree/RadixTreeNode.java +++ /dev/null @@ -1,103 +0,0 @@ -/* -The MIT License - -Copyright (c) 2008 Tahseen Ur Rehman - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -package ds.tree; - -import java.util.ArrayList; -import java.util.List; - -/** - * Represents a node of a Radix tree {@link RadixTreeImpl} - * - * @author Tahseen Ur Rehman - * @email tahseen.ur.rehman {at.spam.me.not} gmail.com - * @param - */ -class RadixTreeNode { - private String key; - - private List> childern; - - private boolean real; - - private T value; - - /** - * intailize the fields with default values to avoid null reference checks - * all over the places - */ - public RadixTreeNode() { - key = ""; - childern = new ArrayList>(); - real = false; - } - - public T getValue() { - return value; - } - - public void setValue(T data) { - this.value = data; - } - - public String getKey() { - return key; - } - - public void setKey(String value) { - this.key = value; - } - - public boolean isReal() { - return real; - } - - public void setReal(boolean datanode) { - this.real = datanode; - } - - public List> getChildern() { - return childern; - } - - public void setChildern(List> childern) { - this.childern = childern; - } - - public int getNumberOfMatchingCharacters(String key) { - int numberOfMatchingCharacters = 0; - while (numberOfMatchingCharacters < key.length() && numberOfMatchingCharacters < this.getKey().length()) { - if (key.charAt(numberOfMatchingCharacters) != this.getKey().charAt(numberOfMatchingCharacters)) { - break; - } - numberOfMatchingCharacters++; - } - return numberOfMatchingCharacters; - } - - @Override - public String toString() { - return key; - } -} diff --git a/util/src/main/java/ds/tree/Visitor.java b/util/src/main/java/ds/tree/Visitor.java deleted file mode 100644 index 34b8d651..00000000 --- a/util/src/main/java/ds/tree/Visitor.java +++ /dev/null @@ -1,57 +0,0 @@ -/* -The MIT License - -Copyright (c) 2008 Tahseen Ur Rehman, Javid Jamae - -http://code.google.com/p/radixtree/ - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -package ds.tree; - -/** - * The visitor interface that is used by {@link RadixTreeImpl} for perfroming - * task on a searched node. - * - * @author Tahseen Ur Rehman (tahseen.ur.rehman {at.spam.me.not} gmail.com) - * @author Javid Jamae - * @author Dennis Heidsiek - * @param - * @param - */ -public interface Visitor { - /** - * This method gets called by {@link RadixTreeImpl#visit(String, Visitor) visit} - * when it finds a node matching the key given to it. - * - * @param key The key that matched the node - * @param parent The parent of the node being visited - * @param node The node that is being visited - */ - public void visit(String key, RadixTreeNode parent, RadixTreeNode node); - - /** - * The visitor can store any type of result object, depending on the context of - * what it is being used for. - * - * @return The result captured by the visitor. - */ - public R getResult(); -} diff --git a/util/src/main/java/ds/tree/VisitorImpl.java b/util/src/main/java/ds/tree/VisitorImpl.java deleted file mode 100644 index 4670ad33..00000000 --- a/util/src/main/java/ds/tree/VisitorImpl.java +++ /dev/null @@ -1,28 +0,0 @@ -package ds.tree; - - -/** - * A simple standard implementation for a {@link Visitor}. - * - * @author Dennis Heidsiek - * @param - * @param - */ -public abstract class VisitorImpl implements Visitor { - - protected R result; - - public VisitorImpl() { - this.result = null; - } - - public VisitorImpl(R initialValue) { - this.result = initialValue; - } - - public R getResult() { - return result; - } - - abstract public void visit(String key, RadixTreeNode parent, RadixTreeNode node); -} \ No newline at end of file diff --git a/util/src/main/java/org/jf/util/ClassFileNameHandler.java b/util/src/main/java/org/jf/util/ClassFileNameHandler.java index 7ac77352..d67836d9 100644 --- a/util/src/main/java/org/jf/util/ClassFileNameHandler.java +++ b/util/src/main/java/org/jf/util/ClassFileNameHandler.java @@ -28,35 +28,62 @@ package org.jf.util; -import ds.tree.RadixTree; -import ds.tree.RadixTreeImpl; +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.Multimap; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import java.io.*; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.IntBuffer; +import java.util.Collection; import java.util.regex.Pattern; /** - * This class checks for case-insensitive file systems, and generates file names based on a given class name, that are - * guaranteed to be unique. When "colliding" class names are found, it appends a numeric identifier to the end of the - * class name to distinguish it from another class with a name that differes only by case. i.e. a.smali and a_2.smali + * This class handles the complexities of translating a class name into a file name. i.e. dealing with case insensitive + * file systems, windows reserved filenames, class names with extremely long package/class elements, etc. + * + * The types of transformations this class does include: + * - append a '#123' style numeric suffix if 2 physical representations collide + * - replace some number of characters in the middle with a '#' character name if an individual path element is too long + * - append a '#' if an individual path element would otherwise be considered a reserved filename */ public class ClassFileNameHandler { - // we leave an extra 10 characters to allow for a numeric suffix to be added, if it's needed - private static final int MAX_FILENAME_LENGTH = 245; + private static final int MAX_FILENAME_LENGTH = 255; + // How many characters to reserve in the physical filename for numeric suffixes + // Dex files can currently only have 64k classes, so 5 digits plus 1 for an '#' should + // be sufficient to handle the case when every class has a conflicting name + private static final int NUMERIC_SUFFIX_RESERVE = 6; - private PackageNameEntry top; + private final int NO_VALUE = -1; + private final int CASE_INSENSITIVE = 0; + private final int CASE_SENSITIVE = 1; + private int forcedCaseSensitivity = NO_VALUE; + + private DirectoryEntry top; private String fileExtension; private boolean modifyWindowsReservedFilenames; public ClassFileNameHandler(File path, String fileExtension) { - this.top = new PackageNameEntry(path); + this.top = new DirectoryEntry(path); this.fileExtension = fileExtension; this.modifyWindowsReservedFilenames = testForWindowsReservedFileNames(path); } + // for testing + public ClassFileNameHandler(File path, String fileExtension, boolean caseSensitive, + boolean modifyWindowsReservedFilenames) { + this.top = new DirectoryEntry(path); + this.fileExtension = fileExtension; + this.forcedCaseSensitivity = caseSensitive?CASE_SENSITIVE:CASE_INSENSITIVE; + this.modifyWindowsReservedFilenames = modifyWindowsReservedFilenames; + } + + private int getMaxFilenameLength() { + return MAX_FILENAME_LENGTH - NUMERIC_SUFFIX_RESERVE; + } + public File getUniqueFilenameForClass(String className) { //class names should be passed in the normal dalvik style, with a leading L, a trailing ;, and using //'/' as a separator. @@ -71,7 +98,6 @@ public class ClassFileNameHandler { } } - String packageElement; String[] packageElements = new String[packageElementCount]; int elementIndex = 0; int elementStart = 1; @@ -83,18 +109,7 @@ public class ClassFileNameHandler { throw new RuntimeException("Not a valid dalvik class name"); } - packageElement = className.substring(elementStart, i); - - if (modifyWindowsReservedFilenames && isReservedFileName(packageElement)) { - packageElement += "#"; - } - - int utf8Length = utf8Length(packageElement); - if (utf8Length > MAX_FILENAME_LENGTH) { - packageElement = shortenPathComponent(packageElement, utf8Length - MAX_FILENAME_LENGTH); - } - - packageElements[elementIndex++] = packageElement; + packageElements[elementIndex++] = className.substring(elementStart, i); elementStart = ++i; } } @@ -107,19 +122,29 @@ public class ClassFileNameHandler { throw new RuntimeException("Not a valid dalvik class name"); } - packageElement = className.substring(elementStart, className.length()-1); - if (modifyWindowsReservedFilenames && isReservedFileName(packageElement)) { - packageElement += "#"; + packageElements[elementIndex] = className.substring(elementStart, className.length()-1); + + return addUniqueChild(top, packageElements, 0); + } + + @Nonnull + private File addUniqueChild(@Nonnull DirectoryEntry parent, @Nonnull String[] packageElements, + int packageElementIndex) { + if (packageElementIndex == packageElements.length - 1) { + FileEntry fileEntry = new FileEntry(parent, packageElements[packageElementIndex] + fileExtension); + parent.addChild(fileEntry); + + String physicalName = fileEntry.getPhysicalName(); + + // the physical name should be set when adding it as a child to the parent + assert physicalName != null; + + return new File(parent.file, physicalName); + } else { + DirectoryEntry directoryEntry = new DirectoryEntry(parent, packageElements[packageElementIndex]); + directoryEntry = (DirectoryEntry)parent.addChild(directoryEntry); + return addUniqueChild(directoryEntry, packageElements, packageElementIndex+1); } - - int utf8Length = utf8Length(packageElement) + utf8Length(fileExtension); - if (utf8Length > MAX_FILENAME_LENGTH) { - packageElement = shortenPathComponent(packageElement, utf8Length - MAX_FILENAME_LENGTH); - } - - packageElements[elementIndex] = packageElement; - - return top.addUniqueChild(packageElements, 0); } private static int utf8Length(String str) { @@ -167,7 +192,6 @@ public class ClassFileNameHandler { } int midPoint = codePoints.length/2; - int delta = 0; int firstEnd = midPoint; // exclusive int secondStart = midPoint+1; // inclusive @@ -228,166 +252,133 @@ public class ClassFileNameHandler { return false; } - private static Pattern reservedFileNameRegex = Pattern.compile("^CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9]$", + private static Pattern reservedFileNameRegex = Pattern.compile("^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])(\\..*)?$", Pattern.CASE_INSENSITIVE); private static boolean isReservedFileName(String className) { return reservedFileNameRegex.matcher(className).matches(); } private abstract class FileSystemEntry { - public final File file; + @Nullable public final DirectoryEntry parent; + @Nonnull public final String logicalName; + @Nullable protected String physicalName = null; - public FileSystemEntry(File file) { - this.file = file; + private FileSystemEntry(@Nullable DirectoryEntry parent, @Nonnull String logicalName) { + this.parent = parent; + this.logicalName = logicalName; } - public abstract File addUniqueChild(String[] pathElements, int pathElementsIndex); + @Nonnull public String getNormalizedName(boolean preserveCase) { + String elementName = logicalName; + if (!preserveCase && parent != null && !parent.isCaseSensitive()) { + elementName = elementName.toLowerCase(); + } - public FileSystemEntry makeVirtual(File parent) { - return new VirtualGroupEntry(this, parent); + if (modifyWindowsReservedFilenames && isReservedFileName(elementName)) { + elementName = addSuffixBeforeExtension(elementName, "#"); + } + + int utf8Length = utf8Length(elementName); + if (utf8Length > getMaxFilenameLength()) { + elementName = shortenPathComponent(elementName, utf8Length - getMaxFilenameLength()); + } + return elementName; } + + @Nullable + public String getPhysicalName() { + return physicalName; + } + + public void setSuffix(int suffix) { + if (suffix < 0 || suffix > 99999) { + throw new IllegalArgumentException("suffix must be in [0, 100000)"); + } + + if (this.physicalName != null) { + throw new IllegalStateException("The suffix can only be set once"); + } + this.physicalName = makePhysicalName(suffix); + } + + protected abstract String makePhysicalName(int suffix); } - private class PackageNameEntry extends FileSystemEntry { - //this contains the FileSystemEntries for all of this package's children - //the associated keys are all lowercase - private RadixTree children = new RadixTreeImpl(); + private class DirectoryEntry extends FileSystemEntry { + @Nullable private File file = null; + private int caseSensitivity = forcedCaseSensitivity; - public PackageNameEntry(File parent, String name) { - super(new File(parent, name)); + // maps a normalized (but not suffixed) entry name to 1 or more FileSystemEntries. + // Each FileSystemEntry asociated with a normalized entry name must have a distinct + // physical name + private final Multimap children = ArrayListMultimap.create(); + + public DirectoryEntry(@Nonnull File path) { + super(null, path.getName()); + file = path; + physicalName = file.getName(); } - public PackageNameEntry(File path) { - super(path); + public DirectoryEntry(@Nullable DirectoryEntry parent, @Nonnull String logicalName) { + super(parent, logicalName); } - @Override - public synchronized File addUniqueChild(String[] pathElements, int pathElementsIndex) { - String elementName; - String elementNameLower; - - if (pathElementsIndex == pathElements.length - 1) { - elementName = pathElements[pathElementsIndex]; - elementName += fileExtension; - } else { - elementName = pathElements[pathElementsIndex]; - } - elementNameLower = elementName.toLowerCase(); - - FileSystemEntry existingEntry = children.find(elementNameLower); - if (existingEntry != null) { - FileSystemEntry virtualEntry = existingEntry; - //if there is already another entry with the same name but different case, we need to - //add a virtual group, and then add the existing entry and the new entry to that group - if (!(existingEntry instanceof VirtualGroupEntry)) { - if (existingEntry.file.getName().equals(elementName)) { - if (pathElementsIndex == pathElements.length - 1) { - return existingEntry.file; - } else { - return existingEntry.addUniqueChild(pathElements, pathElementsIndex + 1); - } - } else { - virtualEntry = existingEntry.makeVirtual(file); - children.replace(elementNameLower, virtualEntry); + public FileSystemEntry addChild(FileSystemEntry entry) { + String normalizedChildName = entry.getNormalizedName(false); + Collection entries = children.get(normalizedChildName); + if (entry instanceof DirectoryEntry) { + for (FileSystemEntry childEntry: entries) { + if (childEntry.logicalName.equals(entry.logicalName)) { + return childEntry; } } - - return virtualEntry.addUniqueChild(pathElements, pathElementsIndex); } - - if (pathElementsIndex == pathElements.length - 1) { - ClassNameEntry classNameEntry = new ClassNameEntry(file, elementName); - children.insert(elementNameLower, classNameEntry); - return classNameEntry.file; - } else { - PackageNameEntry packageNameEntry = new PackageNameEntry(file, elementName); - children.insert(elementNameLower, packageNameEntry); - return packageNameEntry.addUniqueChild(pathElements, pathElementsIndex + 1); - } - } - } - - /** - * A virtual group that groups together file system entries with the same name, differing only in case - */ - private class VirtualGroupEntry extends FileSystemEntry { - //this contains the FileSystemEntries for all of the files/directories in this group - //the key is the unmodified name of the entry, before it is modified to be made unique (if needed). - private RadixTree groupEntries = new RadixTreeImpl(); - - //whether the containing directory is case sensitive or not. - //-1 = unset - //0 = false; - //1 = true; - private int isCaseSensitive = -1; - - public VirtualGroupEntry(FileSystemEntry firstChild, File parent) { - super(parent); - - //use the name of the first child in the group as-is - groupEntries.insert(firstChild.file.getName(), firstChild); + entry.setSuffix(entries.size()); + entries.add(entry); + return entry; } @Override - public File addUniqueChild(String[] pathElements, int pathElementsIndex) { - String elementName = pathElements[pathElementsIndex]; - - if (pathElementsIndex == pathElements.length - 1) { - elementName = elementName + fileExtension; + protected String makePhysicalName(int suffix) { + if (suffix > 0) { + return getNormalizedName(true) + "." + Integer.toString(suffix); } + return getNormalizedName(true); + } - FileSystemEntry existingEntry = groupEntries.find(elementName); - if (existingEntry != null) { - if (pathElementsIndex == pathElements.length - 1) { - return existingEntry.file; - } else { - return existingEntry.addUniqueChild(pathElements, pathElementsIndex+1); - } - } - - if (pathElementsIndex == pathElements.length - 1) { - String fileName; - if (!isCaseSensitive()) { - fileName = pathElements[pathElementsIndex] + "." + (groupEntries.getSize()+1) + fileExtension; - } else { - fileName = elementName; - } - - ClassNameEntry classNameEntry = new ClassNameEntry(file, fileName); - groupEntries.insert(elementName, classNameEntry); - return classNameEntry.file; - } else { - String fileName; - if (!isCaseSensitive()) { - fileName = pathElements[pathElementsIndex] + "." + (groupEntries.getSize()+1); - } else { - fileName = elementName; - } - - PackageNameEntry packageNameEntry = new PackageNameEntry(file, fileName); - groupEntries.insert(elementName, packageNameEntry); - return packageNameEntry.addUniqueChild(pathElements, pathElementsIndex + 1); + @Override + public void setSuffix(int suffix) { + super.setSuffix(suffix); + String physicalName = getPhysicalName(); + if (parent != null && physicalName != null) { + file = new File(parent.file, physicalName); } } - private boolean isCaseSensitive() { - if (isCaseSensitive != -1) { - return isCaseSensitive == 1; + protected boolean isCaseSensitive() { + if (getPhysicalName() == null || file == null) { + throw new IllegalStateException("Must call setSuffix() first"); + } + + if (caseSensitivity != NO_VALUE) { + return caseSensitivity == CASE_SENSITIVE; } File path = file; - if (path.exists() && path.isFile()) { - path = path.getParentFile(); + if (!path.delete()) { + throw new ExceptionWithContext("Can't delete %s to make it into a directory", + path.getAbsolutePath()); + } } - if ((!file.exists() && !file.mkdirs())) { - return false; + if (!path.exists() && !path.mkdirs()) { + throw new ExceptionWithContext("Couldn't create directory %s", path.getAbsolutePath()); } try { boolean result = testCaseSensitivity(path); - isCaseSensitive = result?1:0; + caseSensitivity = result?CASE_SENSITIVE:CASE_INSENSITIVE; return result; } catch (IOException ex) { return false; @@ -447,22 +438,34 @@ public class ClassFileNameHandler { try { f2.delete(); } catch (Exception ex) {} } } + } + + private class FileEntry extends FileSystemEntry { + private FileEntry(@Nullable DirectoryEntry parent, @Nonnull String logicalName) { + super(parent, logicalName); + } @Override - public FileSystemEntry makeVirtual(File parent) { - return this; + protected String makePhysicalName(int suffix) { + if (suffix > 0) { + return addSuffixBeforeExtension(getNormalizedName(true), '.' + Integer.toString(suffix)); + } + return getNormalizedName(true); } } - private class ClassNameEntry extends FileSystemEntry { - public ClassNameEntry(File parent, String name) { - super(new File(parent, name)); - } + private static String addSuffixBeforeExtension(String pathElement, String suffix) { + int extensionStart = pathElement.lastIndexOf('.'); - @Override - public File addUniqueChild(String[] pathElements, int pathElementsIndex) { - assert false; - return file; + StringBuilder newName = new StringBuilder(pathElement.length() + suffix.length() + 1); + if (extensionStart < 0) { + newName.append(pathElement); + newName.append(suffix); + } else { + newName.append(pathElement.subSequence(0, extensionStart)); + newName.append(suffix); + newName.append(pathElement.subSequence(extensionStart, pathElement.length())); } + return newName.toString(); } } diff --git a/util/src/test/java/org/jf/util/ClassFileNameHandlerTest.java b/util/src/test/java/org/jf/util/ClassFileNameHandlerTest.java index e3dfd154..125fbd2f 100644 --- a/util/src/test/java/org/jf/util/ClassFileNameHandlerTest.java +++ b/util/src/test/java/org/jf/util/ClassFileNameHandlerTest.java @@ -31,9 +31,12 @@ package org.jf.util; +import com.google.common.base.Strings; +import com.google.common.io.Files; import junit.framework.Assert; import org.junit.Test; +import java.io.File; import java.nio.charset.Charset; public class ClassFileNameHandlerTest { @@ -91,6 +94,7 @@ public class ClassFileNameHandlerTest { Assert.assertEquals(98, result.length()); } + @Test public void test4ByteEncodings() { StringBuilder sb = new StringBuilder(); for (int i=0x10000; i<0x10000+100; i++) { @@ -101,12 +105,141 @@ public class ClassFileNameHandlerTest { String result = ClassFileNameHandler.shortenPathComponent(sb.toString(), 8); Assert.assertEquals(400, sb.toString().getBytes(UTF8).length); Assert.assertEquals(389, result.getBytes(UTF8).length); - Assert.assertEquals(98, result.length()); + Assert.assertEquals(195, result.length()); - // we remove 3 codepoints == 6 characters == 12 bytes, and then add back in the 1-byte '#' + // we remove 2 codepoints == 4 characters == 8 bytes, and then add back in the 1-byte '#' result = ClassFileNameHandler.shortenPathComponent(sb.toString(), 7); Assert.assertEquals(400, sb.toString().getBytes(UTF8).length); - Assert.assertEquals(3892, result.getBytes(UTF8).length); - Assert.assertEquals(98, result.length()); + Assert.assertEquals(393, result.getBytes(UTF8).length); + Assert.assertEquals(197, result.length()); + } + + @Test + public void testMultipleLongNames() { + String filenameFragment = Strings.repeat("a", 512); + + File tempDir = Files.createTempDir(); + ClassFileNameHandler handler = new ClassFileNameHandler(tempDir, ".smali"); + + // put the differentiating character in the middle, where it will get stripped out by the filename shortening + // logic + File file1 = handler.getUniqueFilenameForClass("La/a/" + filenameFragment + "1" + filenameFragment + ";"); + checkFilename(tempDir, file1, "a", "a", Strings.repeat("a", 124) + "#" + Strings.repeat("a", 118) + ".smali"); + + File file2 = handler.getUniqueFilenameForClass("La/a/" + filenameFragment + "2" + filenameFragment + ";"); + checkFilename(tempDir, file2, "a", "a", Strings.repeat("a", 124) + "#" + Strings.repeat("a", 118) + ".1.smali"); + + Assert.assertFalse(file1.getAbsolutePath().equals(file2.getAbsolutePath())); + } + + @Test + public void testBasicFunctionality() { + File tempDir = Files.createTempDir(); + ClassFileNameHandler handler = new ClassFileNameHandler(tempDir, ".smali"); + + File file = handler.getUniqueFilenameForClass("La/b/c/d;"); + checkFilename(tempDir, file, "a", "b", "c", "d.smali"); + + file = handler.getUniqueFilenameForClass("La/b/c/e;"); + checkFilename(tempDir, file, "a", "b", "c", "e.smali"); + + file = handler.getUniqueFilenameForClass("La/b/d/d;"); + checkFilename(tempDir, file, "a", "b", "d", "d.smali"); + + file = handler.getUniqueFilenameForClass("La/b;"); + checkFilename(tempDir, file, "a", "b.smali"); + + file = handler.getUniqueFilenameForClass("Lb;"); + checkFilename(tempDir, file, "b.smali"); + } + + @Test + public void testCaseInsensitiveFilesystem() { + File tempDir = Files.createTempDir(); + ClassFileNameHandler handler = new ClassFileNameHandler(tempDir, ".smali", false, false); + + File file = handler.getUniqueFilenameForClass("La/b/c;"); + checkFilename(tempDir, file, "a", "b", "c.smali"); + + file = handler.getUniqueFilenameForClass("La/b/C;"); + checkFilename(tempDir, file, "a", "b", "C.1.smali"); + + file = handler.getUniqueFilenameForClass("La/B/c;"); + checkFilename(tempDir, file, "a", "B.1", "c.smali"); + } + + @Test + public void testCaseSensitiveFilesystem() { + File tempDir = Files.createTempDir(); + ClassFileNameHandler handler = new ClassFileNameHandler(tempDir, ".smali", true, false); + + File file = handler.getUniqueFilenameForClass("La/b/c;"); + checkFilename(tempDir, file, "a", "b", "c.smali"); + + file = handler.getUniqueFilenameForClass("La/b/C;"); + checkFilename(tempDir, file, "a", "b", "C.smali"); + + file = handler.getUniqueFilenameForClass("La/B/c;"); + checkFilename(tempDir, file, "a", "B", "c.smali"); + } + + @Test + public void testWindowsReservedFilenames() { + File tempDir = Files.createTempDir(); + ClassFileNameHandler handler = new ClassFileNameHandler(tempDir, ".smali", false, true); + + File file = handler.getUniqueFilenameForClass("La/con/c;"); + checkFilename(tempDir, file, "a", "con#", "c.smali"); + + file = handler.getUniqueFilenameForClass("La/Con/c;"); + checkFilename(tempDir, file, "a", "Con#.1", "c.smali"); + + file = handler.getUniqueFilenameForClass("La/b/PRN;"); + checkFilename(tempDir, file, "a", "b", "PRN#.smali"); + + file = handler.getUniqueFilenameForClass("La/b/prN;"); + checkFilename(tempDir, file, "a", "b", "prN#.1.smali"); + + file = handler.getUniqueFilenameForClass("La/b/com0;"); + checkFilename(tempDir, file, "a", "b", "com0.smali"); + + for (String reservedName: new String[] {"con", "prn", "aux", "nul", "com1", "com9", "lpt1", "lpt9"}) { + file = handler.getUniqueFilenameForClass("L" + reservedName + ";"); + checkFilename(tempDir, file, reservedName +"#.smali"); + } + } + + @Test + public void testIgnoringWindowsReservedFilenames() { + File tempDir = Files.createTempDir(); + ClassFileNameHandler handler = new ClassFileNameHandler(tempDir, ".smali", true, false); + + File file = handler.getUniqueFilenameForClass("La/con/c;"); + checkFilename(tempDir, file, "a", "con", "c.smali"); + + file = handler.getUniqueFilenameForClass("La/Con/c;"); + checkFilename(tempDir, file, "a", "Con", "c.smali"); + + file = handler.getUniqueFilenameForClass("La/b/PRN;"); + checkFilename(tempDir, file, "a", "b", "PRN.smali"); + + file = handler.getUniqueFilenameForClass("La/b/prN;"); + checkFilename(tempDir, file, "a", "b", "prN.smali"); + + file = handler.getUniqueFilenameForClass("La/b/com0;"); + checkFilename(tempDir, file, "a", "b", "com0.smali"); + + for (String reservedName: new String[] {"con", "prn", "aux", "nul", "com1", "com9", "lpt1", "lpt9"}) { + file = handler.getUniqueFilenameForClass("L" + reservedName + ";"); + checkFilename(tempDir, file, reservedName +".smali"); + } + } + + private void checkFilename(File base, File file, String... elements) { + for (int i=elements.length-1; i>=0; i--) { + Assert.assertEquals(elements[i], file.getName()); + file = file.getParentFile(); + } + Assert.assertEquals(base.getAbsolutePath(), file.getAbsolutePath()); } }