Saturday, July 23, 2016

Find longest sublist of distinct words

package test;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.junit.Assert;

public class LongestDistinctWords {
    public static void main(String[] args) {

        List<String> resultA = new LongestDistinctWords().occurences(new ArrayList<String>());
        System.err.println("resultA:"+resultA);
        Assert.assertEquals(resultA, Arrays.asList());
        
        List<String> resultB = new LongestDistinctWords().occurences(Arrays.asList("My","My","House","House","Time","Time","Time","Jeep","Jeep","Jeep","Jeep","Jeep","Jeep","Jeep","Jeep","Cat"));
        System.err.println("resultB:"+resultB);
        Assert.assertEquals(resultB, Arrays.asList("My","House"));
        
        List<String> resultC = new LongestDistinctWords().occurences(Arrays.asList("My","Test","My","My","Test","My","Hello","Dog","Hello"));
        System.err.println("resultC:"+resultC);
        Assert.assertEquals(resultC, Arrays.asList("Test""My""Hello""Dog"));
        
        List<String> resultD = new LongestDistinctWords().occurences(Arrays.asList("My","Test","My","Test","Hello","My","Test","Hello","Dog"));
        System.err.println("resultD:"+resultD);
        Assert.assertEquals(resultD, Arrays.asList("My","Test","Hello","Dog"));

        List<String> resultE = new LongestDistinctWords().occurences(Arrays.asList("My","My","And","My","And","My","Hi","How","Are","Hi","Yes","We"));
        System.err.println("resultE:"+resultE);
        Assert.assertEquals(resultE, Arrays.asList("And","My","Hi","How","Are"));

        List<String> resultF = new LongestDistinctWords().occurences(Arrays.asList("My","My","And","My","And","My","Hi","How","Are","Hi","My","We","Can","See"));
        System.err.println("resultF:"+resultF);
        Assert.assertEquals(resultF, Arrays.asList("How","Are","Hi","My","We","Can","See"));
    }
    
    /**
     * Find longest sublist of distinct words
     * @param words
     * @return
     */

    public List<String> occurences(List<String> words) {
        Integer longestStart = 0;
        Integer longestEnd = 0;
        Map<String, Integer> indexAfterWordLastSeenMap = new HashMap<String, Integer>();
        Integer currentStart = 0;
        Integer currentEnd = 0;
        for(String word: words) {
            Integer lastSeen = indexAfterWordLastSeenMap.get(word);
            if(lastSeen != null && lastSeen > currentStart) {
                if(currentEnd - currentStart > longestEnd - longestStart) {
                    longestStart = currentStart;
                    longestEnd = currentEnd;
                }
                currentStart = lastSeen;
            } 
            currentEnd++;
            indexAfterWordLastSeenMap.put(word, currentEnd);
        }
        if(currentEnd - currentStart > longestEnd - longestStart) {
            longestStart = currentStart;
            longestEnd = currentEnd;
        }
        return words.subList(longestStart, longestEnd);
    }
}

Parsing a list of words and searching for most common occurences

package test;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import org.junit.Assert;

public class KthOccurence {
    public static void main(String[] args) {

        List<String> resultA = new KthOccurence().occurences(4, new ArrayList<String>());
        System.err.println("resultA:"+resultA);
        Assert.assertEquals(resultA, Arrays.asList(nullnullnullnull));
        
        List<String> resultB = new KthOccurence().occurences(4, Arrays.asList("My","My","House","House","Time","Time","Time","Jeep","Jeep","Jeep","Jeep","Jeep","Jeep","Jeep","Jeep","Cat"));
        System.err.println("resultB:"+resultB);
        Assert.assertEquals(resultB, Arrays.asList("My","House","Time","Jeep"));
        
        List<String> resultC = new KthOccurence().occurences(1, Arrays.asList("My","Test","My","My","Test","My","Hello","Dog","Hello"));
        System.err.println("resultC:"+resultC);
        Assert.assertEquals(resultC, Arrays.asList("My"));
        
        List<String> resultD = new KthOccurence().occurences(2, Arrays.asList("My","Test","My","My","Test","My","Hello","Dog","Hello"));
        System.err.println("resultD:"+resultD);
        Assert.assertEquals(resultD, Arrays.asList("Hello","My"));

        List<String> resultE = new KthOccurence().occurences(3, Arrays.asList("My","Test","My","My","Test","My","Hello","Dog","Hello"));
        System.err.println("resultE:"+resultE);
        Assert.assertEquals(resultE, Arrays.asList("Test","Hello","My"));
    }
    
    /**
     * Example: Find the 50 most common words from a book
     * @param K - Number of distinct words to return with highest redundancy
     * @param words - List of words with repeated words
     * @return - Ordered list of highest repeated words
     */

    public List<String> occurences(Integer K, List<String> words) {
        Map<String, Integer> countMap = new HashMap<String, Integer>();
        
        LinkedList<String> trackedList = new LinkedList<String>(); // smallest to largest count in list
        for(int i=0; i<K; i++) {
            trackedList.add(null);
        }
        countMap.put(null, 0);

        for(String currentWord: words) {
            Integer currentCount = countMap.get(currentWord);
            if(currentCount == null) {
                currentCount = 1;
            } else {
                currentCount++;
            }
            countMap.put(currentWord, currentCount);
            
            ListIterator<String> iterator = trackedList.listIterator();
            while(iterator.hasNext()) { // O(K) to walk through linked list
                String nextIteration = iterator.next();
                
                if(currentCount < countMap.get(nextIteration)) { // If 
                    iterator.previous();
                    break;
                }
                
                if(currentWord.equals(nextIteration)) { // if come across word, remove it
                    iterator.remove(); // O(1) to remove from linked list
                }
            }
            // location of iterator is insertion point
            iterator.add(currentWord); // O(1) to add to linked list
            if(trackedList.size() > K) { // An insertion occurred without a remove
                trackedList.removeFirst(); // O(1) to remove first from linked list
            }
        }
        
        return trackedList;
    }
}