diff options
| -rw-r--r-- | Algorithms/Part-II/1-WordNet/Outcast.java | 36 | ||||
| -rw-r--r-- | Algorithms/Part-II/1-WordNet/SAP.java | 154 | ||||
| -rw-r--r-- | Algorithms/Part-II/1-WordNet/SpecializedBFS.java | 150 | ||||
| -rw-r--r-- | Algorithms/Part-II/1-WordNet/WordNet.java | 155 | ||||
| -rwxr-xr-x | Algorithms/Part-II/1-WordNet/run.sh | 2 | 
5 files changed, 454 insertions, 43 deletions
| diff --git a/Algorithms/Part-II/1-WordNet/Outcast.java b/Algorithms/Part-II/1-WordNet/Outcast.java index 296fd79..af2b6f3 100644 --- a/Algorithms/Part-II/1-WordNet/Outcast.java +++ b/Algorithms/Part-II/1-WordNet/Outcast.java @@ -2,26 +2,50 @@  public class Outcast  { +    private WordNet wordNet; +      // constructor takes a WordNet object      public Outcast(WordNet wordnet)      { +        wordNet = wordnet;      }      // given an array of WordNet nouns, return an outcast      public String outcast(String[] nouns)      { -        return ""; +        int maxTotal = -1; +        String outcast = null; + +        for (int i = 0; i < nouns.length; i++) { +            int total = 0; +            for (int j = 0; j < nouns.length; j++) { +                total = total + wordNet.distance(nouns[i], nouns[j]); +            } + +            if (total > maxTotal) { +                maxTotal = total; +                outcast = nouns[i]; +            } +        } + +        return outcast;      }      // for unit testing of this class (such as the one below)      public static void main(String[] args)      { -        WordNet wordnet = new WordNet(args[0], args[1]); +        WordNet wordnet = new WordNet("./data/synsets.txt", "./data/hypernyms.txt");          Outcast outcast = new Outcast(wordnet); -        for (int t = 2; t < args.length; t++) { -            String[] nouns = In.readStrings(args[t]); -            StdOut.println(args[t] + ": " + outcast.outcast(nouns)); -        } +        String[] nouns; + +        nouns = new In("./data/outcast5.txt").readAllStrings(); +        StdOut.println(outcast.outcast(nouns)); + +        nouns = new In("./data/outcast8.txt").readAllStrings(); +        StdOut.println(outcast.outcast(nouns)); + +        nouns = new In("./data/outcast11.txt").readAllStrings(); +        StdOut.println(outcast.outcast(nouns));      }  } diff --git a/Algorithms/Part-II/1-WordNet/SAP.java b/Algorithms/Part-II/1-WordNet/SAP.java index 864b8d0..264102a 100644 --- a/Algorithms/Part-II/1-WordNet/SAP.java +++ b/Algorithms/Part-II/1-WordNet/SAP.java @@ -3,63 +3,175 @@  public class SAP  {      // data type use space proportional to E + V -      // all methods take time at most proportional to E + V in the worst case +    private static  class SAPResult { +        private int ancestor; +        private int length; +        public SAPResult(int a, int l) { +            ancestor = a; +            length = l; +        } +        public int length() { return length; } +        public int ancestor() { return ancestor; } +    }; + +    private Digraph g; +      // constructor takes a digraph (not necessarily a DAG)      public SAP(Digraph G)      { +        g = new Digraph(G);      }      // length of shortest ancestral path between v and w      // -1 if no such path      public int length(int v, int w)      { -        // throws java.lang.IndexOutOfBoundsException if args not in [0 ; G.V()-1] - -        return -1; +        return ancestorAndLength(v, w).length();      }      // a common ancestor of v and w that participates in a shortest ancestral path      // -1 if no such path      public int ancestor(int v, int w)      { -        // throws java.lang.IndexOutOfBoundsException if args not in [0 ; G.V()-1] - -        return -1; +        return ancestorAndLength(v, w).ancestor();      }      // length of shortest ancestral path between any vertex in v and any vertex in w      // -1 if no such path      public int length(Iterable<Integer> v, Iterable<Integer> w)      { -        // throws java.lang.IndexOutOfBoundsException if args not in [0 ; G.V()-1] - -        return -1; +        return ancestorAndLength(v, w).length();      }      // a common ancestor that participates in shortest ancestral path      // -1 if no such path      public int ancestor(Iterable<Integer> v, Iterable<Integer> w)      { -        // throws java.lang.IndexOutOfBoundsException if args not in [0 ; G.V()-1] +        return ancestorAndLength(v, w).ancestor(); +    } + +    private SAPResult ancestorAndLength(int v, int w) +    { +        if ((Math.max(v, w) >= g.V()) || (Math.min(v, w) < 0)) +            throw new IndexOutOfBoundsException("out of index"); + +        // the smallest index shall have the smallest components count +        int a, b; +        if (v < w) { +            a = v; +            b = w; +        } else { +            a = w; +            b = v; +        } + +        int sol = -1; +        int len = Integer.MAX_VALUE; + +        if (a == b) { +            sol = a; +            len = 0; +        } else { + +            ResizingArrayQueue<Integer> c = new ResizingArrayQueue<Integer>(); +            SpecializedBFS aBFS = new SpecializedBFS(g, a, c); +            SpecializedBFS bBFS = new SpecializedBFS(g, b, null); + +            for (int x : c) { +                if (!bBFS.hasPathTo(x)) continue; +                int l = aBFS.distTo(x) + bBFS.distTo(x); +                if (len > l) { +                    len = l; +                    sol = x; +                } +            } +        } + +        if (sol == -1) len = -1; + +        return new SAPResult(sol, len); +    } + +    private SAPResult ancestorAndLength(Iterable<Integer> vv, Iterable<Integer> ww) +    { +        for (int v : vv) { +            if (v < 0 || v >= g.V()) +                throw new IndexOutOfBoundsException("out of index"); +        } +        for (int w : ww) { +            if (w < 0 || w >= g.V()) +                throw new IndexOutOfBoundsException("out of index"); +        } + +        int sol = -1; +        int len = Integer.MAX_VALUE; + +        ResizingArrayQueue<Integer> c = new ResizingArrayQueue<Integer>(); +        SpecializedBFS aBFS = new SpecializedBFS(g, vv, c); +        SpecializedBFS bBFS = new SpecializedBFS(g, ww, null); + +        for (int x : c) { +            if (!bBFS.hasPathTo(x)) continue; +            int l = aBFS.distTo(x) + bBFS.distTo(x); +            if (len > l) { +                len = l; +                sol = x; +            } +        } + +        if (sol == -1) len = -1; + +        SAPResult res = new SAPResult(sol, len); -        return -1; +        return res;      }      // for unit testing of this class (such as the one below)      public static void main(String[] args)      { -        In in = new In(args[0]); -        Digraph G = new Digraph(in); -        SAP sap = new SAP(G); -        while (!StdIn.isEmpty()) { -            int v = StdIn.readInt(); -            int w = StdIn.readInt(); -            int length   = sap.length(v, w); -            int ancestor = sap.ancestor(v, w); -            StdOut.printf("length = %d, ancestor = %d\n", length, ancestor); +        int length, ancestor; +        In in; +        Digraph G; +        SAP sap; + +        in = new In("./data/digraph1.txt"); +        G = new Digraph(in); +        sap = new SAP(G); + +        length = sap.length(3, 11); +        ancestor = sap.ancestor(3, 11); +        StdOut.printf("length = %d, ancestor = %d\n", length, ancestor); + +        length = sap.length(9, 12); +        ancestor = sap.ancestor(9, 12); +        StdOut.printf("length = %d, ancestor = %d\n", length, ancestor); + +        length = sap.length(7, 2); +        ancestor = sap.ancestor(7, 2); +        StdOut.printf("length = %d, ancestor = %d\n", length, ancestor); + +        length = sap.length(1, 6); +        ancestor = sap.ancestor(1, 6); +        StdOut.printf("length = %d, ancestor = %d\n", length, ancestor); + +        Stopwatch w1 = new Stopwatch(); +        for (int i = 1; i < 7; i++) { +            String g = "./data/digraph"+i+".txt"; +            in = new In(g); +            G = new Digraph(in); +            sap = new SAP(G); +            for (int v = 0; v < G.V(); v++) { +                for (int w = 0; w < G.V(); w++) { +                    int a = sap.ancestor(v, w); +                    int l = sap.length(v, w); +                    // StdOut.printf("%s %d->%d : a:%d l:%d\n", g, v, w, a, l); +                } +            }          } +        double a1 = w1.elapsedTime(); +        StdOut.printf("all solved in : %g \n", a1);      }  } diff --git a/Algorithms/Part-II/1-WordNet/SpecializedBFS.java b/Algorithms/Part-II/1-WordNet/SpecializedBFS.java new file mode 100644 index 0000000..5b4a635 --- /dev/null +++ b/Algorithms/Part-II/1-WordNet/SpecializedBFS.java @@ -0,0 +1,150 @@ +/* vim: set expandtab tabstop=4 shiftwidth=4 : */ + +public class SpecializedBFS +{ +    private static final int MAXINT = Integer.MAX_VALUE; + +    private boolean[] marked; +    private int[] edgeTo; +    private int[] distTo; + +    public SpecializedBFS(Digraph G, int s, ResizingArrayQueue<Integer> c) +    { +        init(G); +        Queue<Integer> q = new Queue<Integer>(); +        marked[s] = true; +        distTo[s] = 0; +        q.enqueue(s); +        if (c != null) c.enqueue(s); +        bfs(G, q, c); +    } + +    public SpecializedBFS(Digraph G, Iterable<Integer> ss, +            ResizingArrayQueue<Integer> c) +    { +        init(G); +        Queue<Integer> q = new Queue<Integer>(); +        for (int s : ss) { +            marked[s] = true; +            distTo[s] = 0; +            q.enqueue(s); +            if (c != null) c.enqueue(s); +        } +        bfs(G, q, c); +    } + +    private void init(Digraph g) +    { +        marked = new boolean[g.V()]; +        edgeTo = new int[g.V()]; +        distTo = new int[g.V()]; +        for (int v = 0; v < g.V(); v++) +            distTo[v] = MAXINT; +    } + +    public int distTo(int v) +    { +        return distTo[v]; +    } + +    public boolean hasPathTo(int v) +    { +        return marked[v]; +    } + +    private void bfs(Digraph G, Queue<Integer> q, ResizingArrayQueue<Integer> c) +    { +        while (!q.isEmpty()) { +            int v = q.dequeue(); +            for (int w : G.adj(v)) { +                if (!marked[w]) { +                    edgeTo[w] = v; +                    distTo[w] = distTo[v] + 1; +                    marked[w] = true; +                    q.enqueue(w); +                    if (c != null) c.enqueue(w); +                } +            } +        } +    } + +    // change this to an iterator if efficiency is needed +    public Iterable<Integer> pathTo(int v) +    { +        if (!hasPathTo(v)) return null; + +        Stack<Integer> path = new Stack<Integer>(); +        int x; +        for (x = v; distTo[x] != 0; x = edgeTo[x]) +            path.push(x); +        path.push(x); + +        return path; +    } + +    public int pathToCount(int v) +    { +        if (!hasPathTo(v)) return MAXINT; + +        int count = 0; +        for (int x = v; distTo[x] != 0; x = edgeTo[x]) +            count++; + +        return count; +    } + +    public static void main(String[] args) +    { +        In in = new In("./data/digraph3.txt"); +        Digraph G = new Digraph(in); +        StdOut.println(G); + +        int s = 7; +        ResizingArrayQueue<Integer> components = new ResizingArrayQueue<Integer>(); +        SpecializedBFS bfs = new SpecializedBFS(G, s, components); + +        for (int v = 0; v < G.V(); v++) { +            if (bfs.hasPathTo(v)) { +                StdOut.printf("%2d -> %2d: [%d]  ", s, v, bfs.distTo(v)); +                for (int x : bfs.pathTo(v)) { +                    if (x == s) StdOut.print(x); +                    else        StdOut.print("->" + x); +                } +                StdOut.println(); +            } else { +                StdOut.printf("%2d -> %2d: [-]\n", s, v); +            } +        } +        StdOut.printf("components : "); +        for (int c : components) { +            if (c == s) StdOut.print(c); +            else        StdOut.print("->" + c); +        } +        StdOut.print("\n"); +        StdOut.printf("pathToCount(%d) : %2d\n", 4, bfs.pathToCount(4)); +        StdOut.printf("pathToCount(%d) : %2d\n", 9, bfs.pathToCount(9)); + +        int s2 = 11; +        SpecializedBFS bfs2 = new SpecializedBFS(G, s2, null); + +        for (int v = 0; v < G.V(); v++) { +            if (bfs2.hasPathTo(v)) { +                StdOut.printf("%2d -> %2d: [%d]  ", s2, v, bfs2.distTo(v)); +                for (int x : bfs2.pathTo(v)) { +                    if (x == s2) StdOut.print(x); +                    else        StdOut.print("->" + x); +                } +                StdOut.println(); +            } else { +                StdOut.printf("%2d -> %2d: [-]\n", s2, v); +            } +        } + +        StdOut.printf("intersection : "); +        for (int c : components) { +            if (bfs2.hasPathTo(c)) +                    StdOut.print(c+" "); +        } +        StdOut.print("\n"); +    } +} diff --git a/Algorithms/Part-II/1-WordNet/WordNet.java b/Algorithms/Part-II/1-WordNet/WordNet.java index 830ccdf..2a854fa 100644 --- a/Algorithms/Part-II/1-WordNet/WordNet.java +++ b/Algorithms/Part-II/1-WordNet/WordNet.java @@ -1,58 +1,183 @@  /* vim: set expandtab tabstop=4 shiftwidth=4 : */ +import java.util.HashMap; +import java.util.Iterator; +  public class WordNet  {      // data type space linear in the input size +    private HashMap<String, Bag<Integer>> wordsToIds; +    private HashMap<Integer, String> idToString; +    private Digraph dag; + +    private SAP sap;      // constructor takes the name of the two input files +    // time linearithmic in the input size      public WordNet(String synsets, String hypernyms)      { -        // time linearithmic in the input size +        int size = 0; +        wordsToIds = new HashMap<String, Bag<Integer>>(); +        idToString = new HashMap<Integer, String>(); + +        // load words from synsets +        In syn = new In(synsets); +        while (syn.hasNextLine()) { +            String line = syn.readLine(); +            String[] fields = line.split(","); +            int id = Integer.parseInt(fields[0]); +            String[] words = fields[1].split(" "); +            for (String word : words) { +                Bag<Integer> bag = wordsToIds.get(word); +                if (bag == null) { +                    bag = new Bag<Integer>(); +                    bag.add(id); +                    wordsToIds.put(word, bag); +                } else { +                    bag.add(id); +                } +            } +            idToString.put(id, fields[1]); +            size++; +        } +        syn.close(); + +        dag = new Digraph(size); + +        boolean[] roots = new boolean[dag.V()]; +        for (int v = 0; v < dag.V(); v++) +            roots[v] = true; -        // throw a java.lang.IllegalArgumentException -        // if the input does not correspond to a rooted DAG +        // load edges from hypernyms +        In hyp = new In(hypernyms); +        while (hyp.hasNextLine()) { +            String line = hyp.readLine(); +            String[] fields = line.split(","); +            int id = Integer.parseInt(fields[0]); +            if (fields.length > 1) roots[id] = false; +            for (int i = 1; i < fields.length; i++) { +                dag.addEdge(id, Integer.parseInt(fields[i])); +            } +        } +        hyp.close(); + +        int rootsCount = 0; +        for (int v = 0; v < dag.V(); v++) +            if (roots[v]) rootsCount++; +        if (rootsCount != 1) +                throw new IllegalArgumentException("roots "+rootsCount); + +        checkIndegrees(); + +        sap = new SAP(dag); +    } + +    private void checkIndegrees() +    { +        int[] indegree = new int[dag.V()]; + +        // compute indegrees +        for (int v = 0; v < dag.V(); v++) { +            for (int w : dag.adj(v)) { +                indegree[w]++; +            } +        } + +        // initialize queue to contain all vertices with indegree = 0 (leaves) +        Queue<Integer> queue = new Queue<Integer>(); +        for (int v = 0; v < dag.V(); v++) +            if (indegree[v] == 0) queue.enqueue(v); + +        // BFS and decrease indegrees +        for (int j = 0; !queue.isEmpty(); j++) { +            int v = queue.dequeue(); +            for (int w : dag.adj(v)) { +                indegree[w]--; +                if (indegree[w] == 0) queue.enqueue(w); +            } +        } + +        // check that all indegrees are 0 +        for (int v = 0; v < indegree.length; v++) { +            if (indegree[v] != 0) +                throw new IllegalArgumentException("indegrees"); +        }      }      // the set of nouns (no duplicates), returned as an Iterable      public Iterable<String> nouns()      { -        return null; +        if (wordsToIds == null) return null; +        return wordsToIds.keySet();      }      // is the word a WordNet noun?      public boolean isNoun(String word)      { -        // run in time logarithmic in the number of nouns - -        return false; +        if (wordsToIds == null) return false; +        return wordsToIds.containsKey(word);      }      // distance between nounA and nounB (defined below)      public int distance(String nounA, String nounB)      { -        // run in time linear in the size of the WordNet digraph +        if (!isNoun(nounA) || !isNoun(nounB)) +            throw new IllegalArgumentException(); + +        Iterable<Integer> it0 = nounIterable(nounA); +        Iterable<Integer> it1 = nounIterable(nounB); -        // throw java.lang.IllegalArgumentException -        // unless both of the noun arguments are WordNet nouns +        return sap.length(it0, it1); +    } -        return 0; +    private Iterable<Integer> nounIterable(final String noun) { +        return new Iterable<Integer>() { +            public Iterator<Integer> iterator() { +                return wordsToIds.get(noun).iterator(); +            } +        };      }      // a synset that is the common ancestor of nounA and nounB      // in a shortest ancestral path      public String sap(String nounA, String nounB)      { -        // run in time linear in the size of the WordNet digraph +        if (!isNoun(nounA) || !isNoun(nounB)) +            throw new java.lang.IllegalArgumentException(); -        // throw java.lang.IllegalArgumentException -        // unless both of the noun arguments are WordNet nouns +        Iterable<Integer> it0 = nounIterable(nounA); +        Iterable<Integer> it1 = nounIterable(nounB); -        return ""; +        return idToString.get(sap.ancestor(it0, it1));      }      // for unit testing of this class      public static void main(String[] args)      { +        WordNet wordNet = new WordNet("./data/synsets.txt", "./data/hypernyms.txt"); + +        StdOut.println("23 white_marlin, mileage: " +                + wordNet.distance("white_marlin", "mileage")); +        StdOut.println("32 Black_Plague, black_marlin: " +                + wordNet.distance("Black_Plague", "black_marlin")); +        StdOut.println("32 American_water_spaniel, histology: " +                + wordNet.distance("American_water_spaniel", "histology")); +        StdOut.println("32 Brown_Swiss, barrel_roll: " +                + wordNet.distance("Brown_Swiss", "barrel_roll")); +        StdOut.println("bedspring Carolus_Linnaeus: " +                + wordNet.sap("bedspring", "Carolus_Linnaeus")); +        try { +            StdOut.println("check roots"); +            new WordNet("./data/synsets.txt", "./data/hypernymsInvalidTwoRoots.txt"); +            StdOut.println("BAD"); +        } +        catch (IllegalArgumentException e) { StdOut.println(e.getMessage()); } +        try { +            StdOut.println("check cycles"); +            new WordNet("./data/synsets.txt", "./data/hypernymsInvalidCycle.txt"); +            StdOut.println("BAD"); +        } +        catch (IllegalArgumentException e) { StdOut.println(e.getMessage()); }      }  } diff --git a/Algorithms/Part-II/1-WordNet/run.sh b/Algorithms/Part-II/1-WordNet/run.sh index cd95d44..8c01871 100755 --- a/Algorithms/Part-II/1-WordNet/run.sh +++ b/Algorithms/Part-II/1-WordNet/run.sh @@ -2,7 +2,7 @@  export "CLASSPATH=$CLASSPATH:.:$HOME/algs4/algs4.jar:$HOME/algs4/stdlib.jar" -CLASSES="WordNet SAP Outcast" +CLASSES="SpecializedBFS WordNet SAP Outcast"  rm *.class *.zip 2>/dev/null | 
