diff options
author | Jérémy Zurcher <jeremy@asynk.ch> | 2013-11-21 18:18:55 +0100 |
---|---|---|
committer | Jérémy Zurcher <jeremy@asynk.ch> | 2013-12-12 14:11:14 +0100 |
commit | a60ba769f451259caba14886fdb206593b92df76 (patch) | |
tree | 1c633722971e79bdbadecb4c1da3b677592c7d84 /Algorithms/Part-II | |
parent | 4020858e2a7ca3557d6ddfa40ceec0a191ef2605 (diff) | |
download | coursera-a60ba769f451259caba14886fdb206593b92df76.zip coursera-a60ba769f451259caba14886fdb206593b92df76.tar.gz |
Algorithms-II : 1-WordNet: implementation
Diffstat (limited to 'Algorithms/Part-II')
-rw-r--r-- | Algorithms/Part-II/1-WordNet/Outcast.java | 36 | ||||
-rw-r--r-- | Algorithms/Part-II/1-WordNet/SAP.java | 154 | ||||
-rw-r--r-- | Algorithms/Part-II/1-WordNet/SpecializedBFS.java | 150 | ||||
-rw-r--r-- | Algorithms/Part-II/1-WordNet/WordNet.java | 155 | ||||
-rwxr-xr-x | Algorithms/Part-II/1-WordNet/run.sh | 2 |
5 files changed, 454 insertions, 43 deletions
diff --git a/Algorithms/Part-II/1-WordNet/Outcast.java b/Algorithms/Part-II/1-WordNet/Outcast.java index 296fd79..af2b6f3 100644 --- a/Algorithms/Part-II/1-WordNet/Outcast.java +++ b/Algorithms/Part-II/1-WordNet/Outcast.java @@ -2,26 +2,50 @@ public class Outcast { + private WordNet wordNet; + // constructor takes a WordNet object public Outcast(WordNet wordnet) { + wordNet = wordnet; } // given an array of WordNet nouns, return an outcast public String outcast(String[] nouns) { - return ""; + int maxTotal = -1; + String outcast = null; + + for (int i = 0; i < nouns.length; i++) { + int total = 0; + for (int j = 0; j < nouns.length; j++) { + total = total + wordNet.distance(nouns[i], nouns[j]); + } + + if (total > maxTotal) { + maxTotal = total; + outcast = nouns[i]; + } + } + + return outcast; } // for unit testing of this class (such as the one below) public static void main(String[] args) { - WordNet wordnet = new WordNet(args[0], args[1]); + WordNet wordnet = new WordNet("./data/synsets.txt", "./data/hypernyms.txt"); Outcast outcast = new Outcast(wordnet); - for (int t = 2; t < args.length; t++) { - String[] nouns = In.readStrings(args[t]); - StdOut.println(args[t] + ": " + outcast.outcast(nouns)); - } + String[] nouns; + + nouns = new In("./data/outcast5.txt").readAllStrings(); + StdOut.println(outcast.outcast(nouns)); + + nouns = new In("./data/outcast8.txt").readAllStrings(); + StdOut.println(outcast.outcast(nouns)); + + nouns = new In("./data/outcast11.txt").readAllStrings(); + StdOut.println(outcast.outcast(nouns)); } } diff --git a/Algorithms/Part-II/1-WordNet/SAP.java b/Algorithms/Part-II/1-WordNet/SAP.java index 864b8d0..264102a 100644 --- a/Algorithms/Part-II/1-WordNet/SAP.java +++ b/Algorithms/Part-II/1-WordNet/SAP.java @@ -3,63 +3,175 @@ public class SAP { // data type use space proportional to E + V - // all methods take time at most proportional to E + V in the worst case + private static class SAPResult { + private int ancestor; + private int length; + public SAPResult(int a, int l) { + ancestor = a; + length = l; + } + public int length() { return length; } + public int ancestor() { return ancestor; } + }; + + private Digraph g; + // constructor takes a digraph (not necessarily a DAG) public SAP(Digraph G) { + g = new Digraph(G); } // length of shortest ancestral path between v and w // -1 if no such path public int length(int v, int w) { - // throws java.lang.IndexOutOfBoundsException if args not in [0 ; G.V()-1] - - return -1; + return ancestorAndLength(v, w).length(); } // a common ancestor of v and w that participates in a shortest ancestral path // -1 if no such path public int ancestor(int v, int w) { - // throws java.lang.IndexOutOfBoundsException if args not in [0 ; G.V()-1] - - return -1; + return ancestorAndLength(v, w).ancestor(); } // length of shortest ancestral path between any vertex in v and any vertex in w // -1 if no such path public int length(Iterable<Integer> v, Iterable<Integer> w) { - // throws java.lang.IndexOutOfBoundsException if args not in [0 ; G.V()-1] - - return -1; + return ancestorAndLength(v, w).length(); } // a common ancestor that participates in shortest ancestral path // -1 if no such path public int ancestor(Iterable<Integer> v, Iterable<Integer> w) { - // throws java.lang.IndexOutOfBoundsException if args not in [0 ; G.V()-1] + return ancestorAndLength(v, w).ancestor(); + } + + private SAPResult ancestorAndLength(int v, int w) + { + if ((Math.max(v, w) >= g.V()) || (Math.min(v, w) < 0)) + throw new IndexOutOfBoundsException("out of index"); + + // the smallest index shall have the smallest components count + int a, b; + if (v < w) { + a = v; + b = w; + } else { + a = w; + b = v; + } + + int sol = -1; + int len = Integer.MAX_VALUE; + + if (a == b) { + sol = a; + len = 0; + } else { + + ResizingArrayQueue<Integer> c = new ResizingArrayQueue<Integer>(); + SpecializedBFS aBFS = new SpecializedBFS(g, a, c); + SpecializedBFS bBFS = new SpecializedBFS(g, b, null); + + for (int x : c) { + if (!bBFS.hasPathTo(x)) continue; + int l = aBFS.distTo(x) + bBFS.distTo(x); + if (len > l) { + len = l; + sol = x; + } + } + } + + if (sol == -1) len = -1; + + return new SAPResult(sol, len); + } + + private SAPResult ancestorAndLength(Iterable<Integer> vv, Iterable<Integer> ww) + { + for (int v : vv) { + if (v < 0 || v >= g.V()) + throw new IndexOutOfBoundsException("out of index"); + } + for (int w : ww) { + if (w < 0 || w >= g.V()) + throw new IndexOutOfBoundsException("out of index"); + } + + int sol = -1; + int len = Integer.MAX_VALUE; + + ResizingArrayQueue<Integer> c = new ResizingArrayQueue<Integer>(); + SpecializedBFS aBFS = new SpecializedBFS(g, vv, c); + SpecializedBFS bBFS = new SpecializedBFS(g, ww, null); + + for (int x : c) { + if (!bBFS.hasPathTo(x)) continue; + int l = aBFS.distTo(x) + bBFS.distTo(x); + if (len > l) { + len = l; + sol = x; + } + } + + if (sol == -1) len = -1; + + SAPResult res = new SAPResult(sol, len); - return -1; + return res; } // for unit testing of this class (such as the one below) public static void main(String[] args) { - In in = new In(args[0]); - Digraph G = new Digraph(in); - SAP sap = new SAP(G); - while (!StdIn.isEmpty()) { - int v = StdIn.readInt(); - int w = StdIn.readInt(); - int length = sap.length(v, w); - int ancestor = sap.ancestor(v, w); - StdOut.printf("length = %d, ancestor = %d\n", length, ancestor); + int length, ancestor; + In in; + Digraph G; + SAP sap; + + in = new In("./data/digraph1.txt"); + G = new Digraph(in); + sap = new SAP(G); + + length = sap.length(3, 11); + ancestor = sap.ancestor(3, 11); + StdOut.printf("length = %d, ancestor = %d\n", length, ancestor); + + length = sap.length(9, 12); + ancestor = sap.ancestor(9, 12); + StdOut.printf("length = %d, ancestor = %d\n", length, ancestor); + + length = sap.length(7, 2); + ancestor = sap.ancestor(7, 2); + StdOut.printf("length = %d, ancestor = %d\n", length, ancestor); + + length = sap.length(1, 6); + ancestor = sap.ancestor(1, 6); + StdOut.printf("length = %d, ancestor = %d\n", length, ancestor); + + Stopwatch w1 = new Stopwatch(); + for (int i = 1; i < 7; i++) { + String g = "./data/digraph"+i+".txt"; + in = new In(g); + G = new Digraph(in); + sap = new SAP(G); + for (int v = 0; v < G.V(); v++) { + for (int w = 0; w < G.V(); w++) { + int a = sap.ancestor(v, w); + int l = sap.length(v, w); + // StdOut.printf("%s %d->%d : a:%d l:%d\n", g, v, w, a, l); + } + } } + double a1 = w1.elapsedTime(); + StdOut.printf("all solved in : %g \n", a1); } } diff --git a/Algorithms/Part-II/1-WordNet/SpecializedBFS.java b/Algorithms/Part-II/1-WordNet/SpecializedBFS.java new file mode 100644 index 0000000..5b4a635 --- /dev/null +++ b/Algorithms/Part-II/1-WordNet/SpecializedBFS.java @@ -0,0 +1,150 @@ +/* vim: set expandtab tabstop=4 shiftwidth=4 : */ + +public class SpecializedBFS +{ + private static final int MAXINT = Integer.MAX_VALUE; + + private boolean[] marked; + private int[] edgeTo; + private int[] distTo; + + public SpecializedBFS(Digraph G, int s, ResizingArrayQueue<Integer> c) + { + init(G); + Queue<Integer> q = new Queue<Integer>(); + marked[s] = true; + distTo[s] = 0; + q.enqueue(s); + if (c != null) c.enqueue(s); + bfs(G, q, c); + } + + public SpecializedBFS(Digraph G, Iterable<Integer> ss, + ResizingArrayQueue<Integer> c) + { + init(G); + Queue<Integer> q = new Queue<Integer>(); + for (int s : ss) { + marked[s] = true; + distTo[s] = 0; + q.enqueue(s); + if (c != null) c.enqueue(s); + } + bfs(G, q, c); + } + + private void init(Digraph g) + { + marked = new boolean[g.V()]; + edgeTo = new int[g.V()]; + distTo = new int[g.V()]; + for (int v = 0; v < g.V(); v++) + distTo[v] = MAXINT; + } + + public int distTo(int v) + { + return distTo[v]; + } + + public boolean hasPathTo(int v) + { + return marked[v]; + } + + private void bfs(Digraph G, Queue<Integer> q, ResizingArrayQueue<Integer> c) + { + while (!q.isEmpty()) { + int v = q.dequeue(); + for (int w : G.adj(v)) { + if (!marked[w]) { + edgeTo[w] = v; + distTo[w] = distTo[v] + 1; + marked[w] = true; + q.enqueue(w); + if (c != null) c.enqueue(w); + } + } + } + } + + // change this to an iterator if efficiency is needed + public Iterable<Integer> pathTo(int v) + { + if (!hasPathTo(v)) return null; + + Stack<Integer> path = new Stack<Integer>(); + int x; + for (x = v; distTo[x] != 0; x = edgeTo[x]) + path.push(x); + path.push(x); + + return path; + } + + public int pathToCount(int v) + { + if (!hasPathTo(v)) return MAXINT; + + int count = 0; + for (int x = v; distTo[x] != 0; x = edgeTo[x]) + count++; + + return count; + } + + public static void main(String[] args) + { + In in = new In("./data/digraph3.txt"); + Digraph G = new Digraph(in); + StdOut.println(G); + + int s = 7; + ResizingArrayQueue<Integer> components = new ResizingArrayQueue<Integer>(); + SpecializedBFS bfs = new SpecializedBFS(G, s, components); + + for (int v = 0; v < G.V(); v++) { + if (bfs.hasPathTo(v)) { + StdOut.printf("%2d -> %2d: [%d] ", s, v, bfs.distTo(v)); + for (int x : bfs.pathTo(v)) { + if (x == s) StdOut.print(x); + else StdOut.print("->" + x); + } + StdOut.println(); + } else { + StdOut.printf("%2d -> %2d: [-]\n", s, v); + } + } + StdOut.printf("components : "); + for (int c : components) { + if (c == s) StdOut.print(c); + else StdOut.print("->" + c); + } + StdOut.print("\n"); + StdOut.printf("pathToCount(%d) : %2d\n", 4, bfs.pathToCount(4)); + StdOut.printf("pathToCount(%d) : %2d\n", 9, bfs.pathToCount(9)); + + int s2 = 11; + SpecializedBFS bfs2 = new SpecializedBFS(G, s2, null); + + for (int v = 0; v < G.V(); v++) { + if (bfs2.hasPathTo(v)) { + StdOut.printf("%2d -> %2d: [%d] ", s2, v, bfs2.distTo(v)); + for (int x : bfs2.pathTo(v)) { + if (x == s2) StdOut.print(x); + else StdOut.print("->" + x); + } + StdOut.println(); + } else { + StdOut.printf("%2d -> %2d: [-]\n", s2, v); + } + } + + StdOut.printf("intersection : "); + for (int c : components) { + if (bfs2.hasPathTo(c)) + StdOut.print(c+" "); + } + StdOut.print("\n"); + } +} diff --git a/Algorithms/Part-II/1-WordNet/WordNet.java b/Algorithms/Part-II/1-WordNet/WordNet.java index 830ccdf..2a854fa 100644 --- a/Algorithms/Part-II/1-WordNet/WordNet.java +++ b/Algorithms/Part-II/1-WordNet/WordNet.java @@ -1,58 +1,183 @@ /* vim: set expandtab tabstop=4 shiftwidth=4 : */ +import java.util.HashMap; +import java.util.Iterator; + public class WordNet { // data type space linear in the input size + private HashMap<String, Bag<Integer>> wordsToIds; + private HashMap<Integer, String> idToString; + private Digraph dag; + + private SAP sap; // constructor takes the name of the two input files + // time linearithmic in the input size public WordNet(String synsets, String hypernyms) { - // time linearithmic in the input size + int size = 0; + wordsToIds = new HashMap<String, Bag<Integer>>(); + idToString = new HashMap<Integer, String>(); + + // load words from synsets + In syn = new In(synsets); + while (syn.hasNextLine()) { + String line = syn.readLine(); + String[] fields = line.split(","); + int id = Integer.parseInt(fields[0]); + String[] words = fields[1].split(" "); + for (String word : words) { + Bag<Integer> bag = wordsToIds.get(word); + if (bag == null) { + bag = new Bag<Integer>(); + bag.add(id); + wordsToIds.put(word, bag); + } else { + bag.add(id); + } + } + idToString.put(id, fields[1]); + size++; + } + syn.close(); + + dag = new Digraph(size); + + boolean[] roots = new boolean[dag.V()]; + for (int v = 0; v < dag.V(); v++) + roots[v] = true; - // throw a java.lang.IllegalArgumentException - // if the input does not correspond to a rooted DAG + // load edges from hypernyms + In hyp = new In(hypernyms); + while (hyp.hasNextLine()) { + String line = hyp.readLine(); + String[] fields = line.split(","); + int id = Integer.parseInt(fields[0]); + if (fields.length > 1) roots[id] = false; + for (int i = 1; i < fields.length; i++) { + dag.addEdge(id, Integer.parseInt(fields[i])); + } + } + hyp.close(); + + int rootsCount = 0; + for (int v = 0; v < dag.V(); v++) + if (roots[v]) rootsCount++; + if (rootsCount != 1) + throw new IllegalArgumentException("roots "+rootsCount); + + checkIndegrees(); + + sap = new SAP(dag); + } + + private void checkIndegrees() + { + int[] indegree = new int[dag.V()]; + + // compute indegrees + for (int v = 0; v < dag.V(); v++) { + for (int w : dag.adj(v)) { + indegree[w]++; + } + } + + // initialize queue to contain all vertices with indegree = 0 (leaves) + Queue<Integer> queue = new Queue<Integer>(); + for (int v = 0; v < dag.V(); v++) + if (indegree[v] == 0) queue.enqueue(v); + + // BFS and decrease indegrees + for (int j = 0; !queue.isEmpty(); j++) { + int v = queue.dequeue(); + for (int w : dag.adj(v)) { + indegree[w]--; + if (indegree[w] == 0) queue.enqueue(w); + } + } + + // check that all indegrees are 0 + for (int v = 0; v < indegree.length; v++) { + if (indegree[v] != 0) + throw new IllegalArgumentException("indegrees"); + } } // the set of nouns (no duplicates), returned as an Iterable public Iterable<String> nouns() { - return null; + if (wordsToIds == null) return null; + return wordsToIds.keySet(); } // is the word a WordNet noun? public boolean isNoun(String word) { - // run in time logarithmic in the number of nouns - - return false; + if (wordsToIds == null) return false; + return wordsToIds.containsKey(word); } // distance between nounA and nounB (defined below) public int distance(String nounA, String nounB) { - // run in time linear in the size of the WordNet digraph + if (!isNoun(nounA) || !isNoun(nounB)) + throw new IllegalArgumentException(); + + Iterable<Integer> it0 = nounIterable(nounA); + Iterable<Integer> it1 = nounIterable(nounB); - // throw java.lang.IllegalArgumentException - // unless both of the noun arguments are WordNet nouns + return sap.length(it0, it1); + } - return 0; + private Iterable<Integer> nounIterable(final String noun) { + return new Iterable<Integer>() { + public Iterator<Integer> iterator() { + return wordsToIds.get(noun).iterator(); + } + }; } // a synset that is the common ancestor of nounA and nounB // in a shortest ancestral path public String sap(String nounA, String nounB) { - // run in time linear in the size of the WordNet digraph + if (!isNoun(nounA) || !isNoun(nounB)) + throw new java.lang.IllegalArgumentException(); - // throw java.lang.IllegalArgumentException - // unless both of the noun arguments are WordNet nouns + Iterable<Integer> it0 = nounIterable(nounA); + Iterable<Integer> it1 = nounIterable(nounB); - return ""; + return idToString.get(sap.ancestor(it0, it1)); } // for unit testing of this class public static void main(String[] args) { + WordNet wordNet = new WordNet("./data/synsets.txt", "./data/hypernyms.txt"); + + StdOut.println("23 white_marlin, mileage: " + + wordNet.distance("white_marlin", "mileage")); + StdOut.println("32 Black_Plague, black_marlin: " + + wordNet.distance("Black_Plague", "black_marlin")); + StdOut.println("32 American_water_spaniel, histology: " + + wordNet.distance("American_water_spaniel", "histology")); + StdOut.println("32 Brown_Swiss, barrel_roll: " + + wordNet.distance("Brown_Swiss", "barrel_roll")); + StdOut.println("bedspring Carolus_Linnaeus: " + + wordNet.sap("bedspring", "Carolus_Linnaeus")); + try { + StdOut.println("check roots"); + new WordNet("./data/synsets.txt", "./data/hypernymsInvalidTwoRoots.txt"); + StdOut.println("BAD"); + } + catch (IllegalArgumentException e) { StdOut.println(e.getMessage()); } + try { + StdOut.println("check cycles"); + new WordNet("./data/synsets.txt", "./data/hypernymsInvalidCycle.txt"); + StdOut.println("BAD"); + } + catch (IllegalArgumentException e) { StdOut.println(e.getMessage()); } } } diff --git a/Algorithms/Part-II/1-WordNet/run.sh b/Algorithms/Part-II/1-WordNet/run.sh index cd95d44..8c01871 100755 --- a/Algorithms/Part-II/1-WordNet/run.sh +++ b/Algorithms/Part-II/1-WordNet/run.sh @@ -2,7 +2,7 @@ export "CLASSPATH=$CLASSPATH:.:$HOME/algs4/algs4.jar:$HOME/algs4/stdlib.jar" -CLASSES="WordNet SAP Outcast" +CLASSES="SpecializedBFS WordNet SAP Outcast" rm *.class *.zip 2>/dev/null |