Saturday, July 23, 2016

Parsing a list of words and searching for most common occurences

package test;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import org.junit.Assert;

public class KthOccurence {
    public static void main(String[] args) {

        List<String> resultA = new KthOccurence().occurences(4, new ArrayList<String>());
        System.err.println("resultA:"+resultA);
        Assert.assertEquals(resultA, Arrays.asList(nullnullnullnull));
        
        List<String> resultB = new KthOccurence().occurences(4, Arrays.asList("My","My","House","House","Time","Time","Time","Jeep","Jeep","Jeep","Jeep","Jeep","Jeep","Jeep","Jeep","Cat"));
        System.err.println("resultB:"+resultB);
        Assert.assertEquals(resultB, Arrays.asList("My","House","Time","Jeep"));
        
        List<String> resultC = new KthOccurence().occurences(1, Arrays.asList("My","Test","My","My","Test","My","Hello","Dog","Hello"));
        System.err.println("resultC:"+resultC);
        Assert.assertEquals(resultC, Arrays.asList("My"));
        
        List<String> resultD = new KthOccurence().occurences(2, Arrays.asList("My","Test","My","My","Test","My","Hello","Dog","Hello"));
        System.err.println("resultD:"+resultD);
        Assert.assertEquals(resultD, Arrays.asList("Hello","My"));

        List<String> resultE = new KthOccurence().occurences(3, Arrays.asList("My","Test","My","My","Test","My","Hello","Dog","Hello"));
        System.err.println("resultE:"+resultE);
        Assert.assertEquals(resultE, Arrays.asList("Test","Hello","My"));
    }
    
    /**
     * Example: Find the 50 most common words from a book
     * @param K - Number of distinct words to return with highest redundancy
     * @param words - List of words with repeated words
     * @return - Ordered list of highest repeated words
     */

    public List<String> occurences(Integer K, List<String> words) {
        Map<String, Integer> countMap = new HashMap<String, Integer>();
        
        LinkedList<String> trackedList = new LinkedList<String>(); // smallest to largest count in list
        for(int i=0; i<K; i++) {
            trackedList.add(null);
        }
        countMap.put(null, 0);

        for(String currentWord: words) {
            Integer currentCount = countMap.get(currentWord);
            if(currentCount == null) {
                currentCount = 1;
            } else {
                currentCount++;
            }
            countMap.put(currentWord, currentCount);
            
            ListIterator<String> iterator = trackedList.listIterator();
            while(iterator.hasNext()) { // O(K) to walk through linked list
                String nextIteration = iterator.next();
                
                if(currentCount < countMap.get(nextIteration)) { // If 
                    iterator.previous();
                    break;
                }
                
                if(currentWord.equals(nextIteration)) { // if come across word, remove it
                    iterator.remove(); // O(1) to remove from linked list
                }
            }
            // location of iterator is insertion point
            iterator.add(currentWord); // O(1) to add to linked list
            if(trackedList.size() > K) { // An insertion occurred without a remove
                trackedList.removeFirst(); // O(1) to remove first from linked list
            }
        }
        
        return trackedList;
    }
}

No comments:

Post a Comment