Wednesday, July 4, 2012

Java Trie

After receiving so many visits to my early C# Trie post, I am now releasing source code for a Java Trie, which works in exactly the same way as the C# Trie. The comments have been fixed (I discovered lots of incomplete comments) and some optimizations have been implemented. The code will just be pasted in this post rather than placed in a repository like the C# Trie was. More information can be found in the C# Trie post, but notice that IPrefixMatcher was renamed to PrefixMatcher and PrefixMatcher was renamed to DefaultPrefixMatcher.

Trie class
/*
 * Copyright (c) 2012 Marc Tanti (www.marctanti.com)
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
package trie;

/**
 * Trie data structure which maps strings and string prefixes to values.
 * In this implementation, only one PrefixMatcher can be used but it can be modified to use a list of concurrent prefix matchers.
 * @param <V> Type of values to map to.
 */
public class Trie <V> {

    /**
     * The root of the trie.
     */
    private TrieNode<V> root;

    /**
     * PrefixMatcher object for following the prefix of the strings in the trie to their associated values.
     */
    private PrefixMatcher<V> prefixMatcher;

    /**
     * Create an empty trie with an empty root node and an unused prefix matcher.
     */
    public Trie()
    {
        this.root = new TrieNode<V>(null, '\0');
        this.prefixMatcher = new DefaultPrefixMatcher<V>(this.root);
    }

    /**
     * Get the prefix matcher to search for strings with in a character by character basis.
     * @return The prefix matcher. WARNING: There is only one prefix matcher per trie and calling this method will always return the same instance. The instance will retain its previous state.
     */
    public PrefixMatcher<V> getPrefixMatcher() {
        return this.prefixMatcher;
    }

    /**
     * Put a new key-value pair, overwriting the existing value if the given key is already in use.
     * @param key The full string which is associated with the value.
     * @param value The value which is associated with the key.
     */
    public void put(String key, V value)
    {
        TrieNode<V> node = this.root;
        for (char c : key.toCharArray())
        {
            node = node.addChild(c);
        }
        node.setValue(value);
    }

    /**
     * Remove the value that a key leads to and any redundant nodes which result from this deletion.
     * Clears the current matching process.
     * @param key Key of the value to remove.
     */
    public void remove(String key)
    {
        TrieNode<V> node = this.root;
        for (char c : key.toCharArray())
        {
            node = node.getChild(c);
        }
        node.setValue(null);

        //Remove all ancestor nodes which lead to this null value.
        while (node != this.root && !node.isTerminater() && node.numChildren() == 1)
        {
            char prevKey = node.getKey();
            node = node.getParent();
            node.removeChild(prevKey);
        }

        this.prefixMatcher.resetMatch();
    }
    
}

TrieNode class
/*
 * Copyright (c) 2012 Marc Tanti (www.marctanti.com)
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
package trie;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

/**
 * Node of the trie.
 * Stores a link to multiple children of type TrieNode, each associated with a key of type Character.
 * If the node terminates a string then it must also store a non-null value of type V.
 * @param <V> The type of the value associated with a complete string.
 */
public class TrieNode <V> {
    
    /**
     * The value stored by this node. If not null then the node terminates a string.
     */
    private V value;
    
    /**
     * The key which is associated with this node, that is, the character that led to this node.
     */
    private char key;
    
    /**
     * The parent of this node.
     */
    private TrieNode<V> parent;
    
    /**
     * The children of this node which can be found through their associated character.
     */
    private HashMap<Character, TrieNode<V>> children;

    /**
     * Create an empty node with no children and null value.
     * @param key The key which is associated with this node, that is, the character that led to this node.
     * @param parent The parent node of this node.
     */
    public TrieNode(TrieNode<V> parent, char key) {
        this.key = key;
        this.parent = parent;
        this.value = null;
        this.children = new HashMap<Character, TrieNode<V>>();
    }

    /**
     * Get the value stored by this node. If the value is not null then the node terminates a string.
     * @return The value stored by this node.
     */
    public V getValue() {
        return value;
    }
    
    /**
     * Set the value stored by this node. If the value is not null then the node terminates a string.
     * @param value The value stored by this node.
     */
    public void setValue(V value) {
        this.value = value;
    }

    /**
     * Get the key which is associated with this node, that is, the character that led to this node.
     * @return The key which is associated with this node.
     */
    public char getKey() {
        return key;
    }

    /**
     * Get the parent of this node.
     * @return The parent of this node.
     */
    public TrieNode<V> getParent() {
        return parent;
    }
    
    /**
     * Get a child of this node which is associated with a key.
     * @param key Key associated with the child of interest.
     * @return The child or null if no child is associated with the given key.
     */
    public TrieNode<V> getChild(char key) {
        if (this.children.containsKey(key)) {
            return this.children.get(key);
        }
        return null;
    }

    /**
     * Check whether or not this node terminates a string and stores a value.
     * @return Whether node stores a value.
     */
    public boolean isTerminater() {
        return this.value != null;
    }

    /**
     * Get the number of children that this node has.
     * @return The number of children.
     */
    public int numChildren() {
        return this.children.size();
    }

    /**
     * Check whether or not this node has any children.
     * @return True if node does not have children, false otherwise.
     */
    public boolean isLeaf() {
        return this.numChildren() == 0;
    }

    /**
     * Check whether or not one of the children of this node is associated with the given key.
     * @param key The key to check for.
     * @return True if a child with given key exists, false otherwise.
     */
    public boolean containsKey(char key) {
        return this.children.containsKey(key);
    }

    /**
     * Add an empty child node (associated with a key) to this node and return the node.
     * @param key The key to associate with the empty child node.
     * @return If the given key already exists then return the existing child node, otherwise return the new child node.
     */
    public TrieNode<V> addChild(char key) {
        if (this.children.containsKey(key)) {
            return this.children.get(key);
        } else {
            TrieNode<V> newChild = new TrieNode<V>(this, key);
            this.children.put(key, newChild);
            return newChild;
        }
    }

    /**
     * Remove the child of a node associated with a key along with all its descendents.
     * @param key The key associated with the child to remove.
     */
    public void removeChild(char key) {
        this.children.remove(key);
    }

    /**
     * Get a list of values contained in this node and all its descendants.
     * Since all the descendants share the same string prefix as this node, then all the values returned will belong to all the strings with this node's prefix.
     * @return A List of values.
     */
    public List<V> prefixMatches() {
        ArrayList<V> values = new ArrayList<V>();
        
        for (TrieNode <V> child : this.children.values()) {
            values.addAll(child.prefixMatches());
        }

        if (this.isTerminater()) {
            values.add(this.value);
        }

        return values;
    }
}

DefaultPrefixMatcher class
/*
 * Copyright (c) 2012 Marc Tanti (www.marctanti.com)
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
package trie;

import java.util.List;

/**
 * This class is used to search through a trie for all the strings which have a given prefix. The prefix will be entered character by character.
 * The object from this class is used as a state to mark where the currently entered prefixed has arrived in the trie thus far.
 * This class makes it possible to have a number of matcher objects all concurrently following prefixes in the trie.
 * @param <V> The type of the value associated with a complete string.
 */
public class DefaultPrefixMatcher <V> implements PrefixMatcher<V> {

    /**
     * The root of the trie being searched.
     */
    private TrieNode<V> root;

    /**
     * The trie node that the prefix entered thus far has led to.
     */
    private TrieNode<V> currNode;

    /**
     * The full prefix entered thus far.
     */
    private StringBuffer prefix;
    
    /**
     * Create a matcher, associating it to the trie to search in.
     * @param root Root node of the trie which the matcher will search in.
     */
    public DefaultPrefixMatcher(TrieNode<V> root)
    {
        this.root = root;
        this.currNode = root;
        this.prefix = new StringBuffer();
    }
    
    /**
     * Get the prefix entered so far.
     * @return The prefix entered so far.
     */
    @Override
    public String getPrefix() {
        return this.prefix.toString();
    }

    /**
     * Clear the currently entered prefix.
     */
    @Override
    public void resetMatch() {
        this.currNode = this.root;
        this.prefix = new StringBuffer();
    }
    
    /**
     * Remove the last character of the currently entered prefix.
     */
    @Override
    public void backMatch() {
        if (this.currNode != this.root)
        {
            this.currNode = this.currNode.getParent();
            this.prefix.deleteCharAt(this.prefix.length() - 1);
        }
    }
    
    /**
     * Get the last character of the currently entered prefix.
     * @return The last character of the currently entered prefix.
     */
    @Override
    public char lastMatch() {
        return this.currNode.getKey();
    }
    
    /**
     * Add another character to the end of the prefix if it leads to some existing strings in the trie.
     * If no strings have a matching prefix, character will not be added.
     * @param next Next character in the prefix.
     * @return True if the new prefix was a prefix to some strings in the trie, false otherwise.
     */
    @Override
    public boolean nextMatch(char next) {
        if (this.currNode.containsKey(next))
        {
            this.currNode = this.currNode.getChild(next);
            this.prefix.append(next);
            return true;
        }
        return false;
    }
    
    /**
     * Get all the values associated with strings that share the currently entered prefix.
     * @return The list of values.
     */
    @Override
    public List<V> getPrefixMatches() {
        return this.currNode.prefixMatches();
    }
    
    /**
     * Check if the currently entered prefix is a complete existing string in the trie.
     * @return True if the currently entered prefix is an existing string, false otherwise.
     */
    @Override
    public boolean isExactMatch() {
        return this.currNode.isTerminater();
    }
    
    /**
     * Get the value associate with the complete string that equals the prefix entered thus far.
     * @return The value if the current entered prefix is an existing string, null otherwise.
     */
    @Override
    public V getExactMatch() {
        if (this.isExactMatch()) {
            return this.currNode.getValue();
        }
        return null;
    }
    
}

PrefixMatcher interface
/*
 * Copyright (c) 2012 Marc Tanti (www.marctanti.com)
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
package trie;

import java.util.List;

/**
 * A simplified view of the concrete prefix matcher class for the user.
 * This is used to search through a trie for all the strings which have a given prefix. The prefix will be entered character by character.
 * @param <V> The type of the value associated with a complete string.
 */
public interface PrefixMatcher<V> {
    
    /**
     * Get the prefix entered so far.
     * @return The prefix entered so far.
     */
    String getPrefix();
    
    /**
     * Clear the currently entered prefix.
     */
    void resetMatch();
    
    /**
     * Remove the last character of the currently entered prefix.
     */
    void backMatch();
    
    /**
     * Get the last character of the currently entered prefix.
     * @return The last character of the currently entered prefix.
     */
    char lastMatch();
    
    /**
     * Add another character to the end of the prefix if it leads to some existing strings in the trie.
     * If no strings have a matching prefix, character will not be added.
     * @param next Next character in the prefix.
     * @return True if the new prefix was a prefix to some strings in the trie, false otherwise.
     */
    boolean nextMatch(char next);
    
    /**
     * Get all the values associated with strings that share the currently entered prefix.
     * @return The list of values.
     */
    List<V> getPrefixMatches();
    
    /**
     * Check if the currently entered prefix is a complete existing string in the trie.
     * @return True if the currently entered prefix is an existing string, false otherwise.
     */
    boolean isExactMatch();
    
    /**
     * Get the value associate with the complete string that equals the prefix entered thus far.
     * @return The value.
     */
    V getExactMatch();
    
}

Example
//Create trie
Trie<String> trie = new Trie<String>();

//Add some key-value pairs to the trie
trie.put("James", "1");
trie.put("Jake", "2");
trie.put("Fred", "3");

//Search the trie
trie.getPrefixMatcher().nextMatch('J'); //Prefix thus far: "J"
trie.getPrefixMatcher().getPrefixMatches(); //[1, 2]
trie.getPrefixMatcher().isExactMatch(); //false
trie.getPrefixMatcher().nextMatch('a');
trie.getPrefixMatcher().nextMatch('m'); //Prefix thus far: "Jam"
trie.getPrefixMatcher().getPrefixMatches(); //[1]
trie.getPrefixMatcher().nextMatch('e');
trie.getPrefixMatcher().nextMatch('s'); //Prefix thus far: "James"
trie.getPrefixMatcher().isExactMatch(); //true
trie.getPrefixMatcher().getExactMatch(); //1

//Remove a string-value pair
trie.remove("James");