summaryrefslogtreecommitdiffstats
path: root/Algorithms/Part-II/1-WordNet
diff options
context:
space:
mode:
authorJérémy Zurcher <jeremy@asynk.ch>2013-11-21 18:18:55 +0100
committerJérémy Zurcher <jeremy@asynk.ch>2013-12-12 14:11:14 +0100
commita60ba769f451259caba14886fdb206593b92df76 (patch)
tree1c633722971e79bdbadecb4c1da3b677592c7d84 /Algorithms/Part-II/1-WordNet
parent4020858e2a7ca3557d6ddfa40ceec0a191ef2605 (diff)
downloadcoursera-a60ba769f451259caba14886fdb206593b92df76.zip
coursera-a60ba769f451259caba14886fdb206593b92df76.tar.gz
Algorithms-II : 1-WordNet: implementation
Diffstat (limited to 'Algorithms/Part-II/1-WordNet')
-rw-r--r--Algorithms/Part-II/1-WordNet/Outcast.java36
-rw-r--r--Algorithms/Part-II/1-WordNet/SAP.java154
-rw-r--r--Algorithms/Part-II/1-WordNet/SpecializedBFS.java150
-rw-r--r--Algorithms/Part-II/1-WordNet/WordNet.java155
-rwxr-xr-xAlgorithms/Part-II/1-WordNet/run.sh2
5 files changed, 454 insertions, 43 deletions
diff --git a/Algorithms/Part-II/1-WordNet/Outcast.java b/Algorithms/Part-II/1-WordNet/Outcast.java
index 296fd79..af2b6f3 100644
--- a/Algorithms/Part-II/1-WordNet/Outcast.java
+++ b/Algorithms/Part-II/1-WordNet/Outcast.java
@@ -2,26 +2,50 @@
public class Outcast
{
+ private WordNet wordNet;
+
// constructor takes a WordNet object
public Outcast(WordNet wordnet)
{
+ wordNet = wordnet;
}
// given an array of WordNet nouns, return an outcast
public String outcast(String[] nouns)
{
- return "";
+ int maxTotal = -1;
+ String outcast = null;
+
+ for (int i = 0; i < nouns.length; i++) {
+ int total = 0;
+ for (int j = 0; j < nouns.length; j++) {
+ total = total + wordNet.distance(nouns[i], nouns[j]);
+ }
+
+ if (total > maxTotal) {
+ maxTotal = total;
+ outcast = nouns[i];
+ }
+ }
+
+ return outcast;
}
// for unit testing of this class (such as the one below)
public static void main(String[] args)
{
- WordNet wordnet = new WordNet(args[0], args[1]);
+ WordNet wordnet = new WordNet("./data/synsets.txt", "./data/hypernyms.txt");
Outcast outcast = new Outcast(wordnet);
- for (int t = 2; t < args.length; t++) {
- String[] nouns = In.readStrings(args[t]);
- StdOut.println(args[t] + ": " + outcast.outcast(nouns));
- }
+ String[] nouns;
+
+ nouns = new In("./data/outcast5.txt").readAllStrings();
+ StdOut.println(outcast.outcast(nouns));
+
+ nouns = new In("./data/outcast8.txt").readAllStrings();
+ StdOut.println(outcast.outcast(nouns));
+
+ nouns = new In("./data/outcast11.txt").readAllStrings();
+ StdOut.println(outcast.outcast(nouns));
}
}
diff --git a/Algorithms/Part-II/1-WordNet/SAP.java b/Algorithms/Part-II/1-WordNet/SAP.java
index 864b8d0..264102a 100644
--- a/Algorithms/Part-II/1-WordNet/SAP.java
+++ b/Algorithms/Part-II/1-WordNet/SAP.java
@@ -3,63 +3,175 @@
public class SAP
{
// data type use space proportional to E + V
-
// all methods take time at most proportional to E + V in the worst case
+ private static class SAPResult {
+ private int ancestor;
+ private int length;
+ public SAPResult(int a, int l) {
+ ancestor = a;
+ length = l;
+ }
+ public int length() { return length; }
+ public int ancestor() { return ancestor; }
+ };
+
+ private Digraph g;
+
// constructor takes a digraph (not necessarily a DAG)
public SAP(Digraph G)
{
+ g = new Digraph(G);
}
// length of shortest ancestral path between v and w
// -1 if no such path
public int length(int v, int w)
{
- // throws java.lang.IndexOutOfBoundsException if args not in [0 ; G.V()-1]
-
- return -1;
+ return ancestorAndLength(v, w).length();
}
// a common ancestor of v and w that participates in a shortest ancestral path
// -1 if no such path
public int ancestor(int v, int w)
{
- // throws java.lang.IndexOutOfBoundsException if args not in [0 ; G.V()-1]
-
- return -1;
+ return ancestorAndLength(v, w).ancestor();
}
// length of shortest ancestral path between any vertex in v and any vertex in w
// -1 if no such path
public int length(Iterable<Integer> v, Iterable<Integer> w)
{
- // throws java.lang.IndexOutOfBoundsException if args not in [0 ; G.V()-1]
-
- return -1;
+ return ancestorAndLength(v, w).length();
}
// a common ancestor that participates in shortest ancestral path
// -1 if no such path
public int ancestor(Iterable<Integer> v, Iterable<Integer> w)
{
- // throws java.lang.IndexOutOfBoundsException if args not in [0 ; G.V()-1]
+ return ancestorAndLength(v, w).ancestor();
+ }
+
+ private SAPResult ancestorAndLength(int v, int w)
+ {
+ if ((Math.max(v, w) >= g.V()) || (Math.min(v, w) < 0))
+ throw new IndexOutOfBoundsException("out of index");
+
+ // the smallest index shall have the smallest components count
+ int a, b;
+ if (v < w) {
+ a = v;
+ b = w;
+ } else {
+ a = w;
+ b = v;
+ }
+
+ int sol = -1;
+ int len = Integer.MAX_VALUE;
+
+ if (a == b) {
+ sol = a;
+ len = 0;
+ } else {
+
+ ResizingArrayQueue<Integer> c = new ResizingArrayQueue<Integer>();
+ SpecializedBFS aBFS = new SpecializedBFS(g, a, c);
+ SpecializedBFS bBFS = new SpecializedBFS(g, b, null);
+
+ for (int x : c) {
+ if (!bBFS.hasPathTo(x)) continue;
+ int l = aBFS.distTo(x) + bBFS.distTo(x);
+ if (len > l) {
+ len = l;
+ sol = x;
+ }
+ }
+ }
+
+ if (sol == -1) len = -1;
+
+ return new SAPResult(sol, len);
+ }
+
+ private SAPResult ancestorAndLength(Iterable<Integer> vv, Iterable<Integer> ww)
+ {
+ for (int v : vv) {
+ if (v < 0 || v >= g.V())
+ throw new IndexOutOfBoundsException("out of index");
+ }
+ for (int w : ww) {
+ if (w < 0 || w >= g.V())
+ throw new IndexOutOfBoundsException("out of index");
+ }
+
+ int sol = -1;
+ int len = Integer.MAX_VALUE;
+
+ ResizingArrayQueue<Integer> c = new ResizingArrayQueue<Integer>();
+ SpecializedBFS aBFS = new SpecializedBFS(g, vv, c);
+ SpecializedBFS bBFS = new SpecializedBFS(g, ww, null);
+
+ for (int x : c) {
+ if (!bBFS.hasPathTo(x)) continue;
+ int l = aBFS.distTo(x) + bBFS.distTo(x);
+ if (len > l) {
+ len = l;
+ sol = x;
+ }
+ }
+
+ if (sol == -1) len = -1;
+
+ SAPResult res = new SAPResult(sol, len);
- return -1;
+ return res;
}
// for unit testing of this class (such as the one below)
public static void main(String[] args)
{
- In in = new In(args[0]);
- Digraph G = new Digraph(in);
- SAP sap = new SAP(G);
- while (!StdIn.isEmpty()) {
- int v = StdIn.readInt();
- int w = StdIn.readInt();
- int length = sap.length(v, w);
- int ancestor = sap.ancestor(v, w);
- StdOut.printf("length = %d, ancestor = %d\n", length, ancestor);
+ int length, ancestor;
+ In in;
+ Digraph G;
+ SAP sap;
+
+ in = new In("./data/digraph1.txt");
+ G = new Digraph(in);
+ sap = new SAP(G);
+
+ length = sap.length(3, 11);
+ ancestor = sap.ancestor(3, 11);
+ StdOut.printf("length = %d, ancestor = %d\n", length, ancestor);
+
+ length = sap.length(9, 12);
+ ancestor = sap.ancestor(9, 12);
+ StdOut.printf("length = %d, ancestor = %d\n", length, ancestor);
+
+ length = sap.length(7, 2);
+ ancestor = sap.ancestor(7, 2);
+ StdOut.printf("length = %d, ancestor = %d\n", length, ancestor);
+
+ length = sap.length(1, 6);
+ ancestor = sap.ancestor(1, 6);
+ StdOut.printf("length = %d, ancestor = %d\n", length, ancestor);
+
+ Stopwatch w1 = new Stopwatch();
+ for (int i = 1; i < 7; i++) {
+ String g = "./data/digraph"+i+".txt";
+ in = new In(g);
+ G = new Digraph(in);
+ sap = new SAP(G);
+ for (int v = 0; v < G.V(); v++) {
+ for (int w = 0; w < G.V(); w++) {
+ int a = sap.ancestor(v, w);
+ int l = sap.length(v, w);
+ // StdOut.printf("%s %d->%d : a:%d l:%d\n", g, v, w, a, l);
+ }
+ }
}
+ double a1 = w1.elapsedTime();
+ StdOut.printf("all solved in : %g \n", a1);
}
}
diff --git a/Algorithms/Part-II/1-WordNet/SpecializedBFS.java b/Algorithms/Part-II/1-WordNet/SpecializedBFS.java
new file mode 100644
index 0000000..5b4a635
--- /dev/null
+++ b/Algorithms/Part-II/1-WordNet/SpecializedBFS.java
@@ -0,0 +1,150 @@
+/* vim: set expandtab tabstop=4 shiftwidth=4 : */
+
+public class SpecializedBFS
+{
+ private static final int MAXINT = Integer.MAX_VALUE;
+
+ private boolean[] marked;
+ private int[] edgeTo;
+ private int[] distTo;
+
+ public SpecializedBFS(Digraph G, int s, ResizingArrayQueue<Integer> c)
+ {
+ init(G);
+ Queue<Integer> q = new Queue<Integer>();
+ marked[s] = true;
+ distTo[s] = 0;
+ q.enqueue(s);
+ if (c != null) c.enqueue(s);
+ bfs(G, q, c);
+ }
+
+ public SpecializedBFS(Digraph G, Iterable<Integer> ss,
+ ResizingArrayQueue<Integer> c)
+ {
+ init(G);
+ Queue<Integer> q = new Queue<Integer>();
+ for (int s : ss) {
+ marked[s] = true;
+ distTo[s] = 0;
+ q.enqueue(s);
+ if (c != null) c.enqueue(s);
+ }
+ bfs(G, q, c);
+ }
+
+ private void init(Digraph g)
+ {
+ marked = new boolean[g.V()];
+ edgeTo = new int[g.V()];
+ distTo = new int[g.V()];
+ for (int v = 0; v < g.V(); v++)
+ distTo[v] = MAXINT;
+ }
+
+ public int distTo(int v)
+ {
+ return distTo[v];
+ }
+
+ public boolean hasPathTo(int v)
+ {
+ return marked[v];
+ }
+
+ private void bfs(Digraph G, Queue<Integer> q, ResizingArrayQueue<Integer> c)
+ {
+ while (!q.isEmpty()) {
+ int v = q.dequeue();
+ for (int w : G.adj(v)) {
+ if (!marked[w]) {
+ edgeTo[w] = v;
+ distTo[w] = distTo[v] + 1;
+ marked[w] = true;
+ q.enqueue(w);
+ if (c != null) c.enqueue(w);
+ }
+ }
+ }
+ }
+
+ // change this to an iterator if efficiency is needed
+ public Iterable<Integer> pathTo(int v)
+ {
+ if (!hasPathTo(v)) return null;
+
+ Stack<Integer> path = new Stack<Integer>();
+ int x;
+ for (x = v; distTo[x] != 0; x = edgeTo[x])
+ path.push(x);
+ path.push(x);
+
+ return path;
+ }
+
+ public int pathToCount(int v)
+ {
+ if (!hasPathTo(v)) return MAXINT;
+
+ int count = 0;
+ for (int x = v; distTo[x] != 0; x = edgeTo[x])
+ count++;
+
+ return count;
+ }
+
+ public static void main(String[] args)
+ {
+ In in = new In("./data/digraph3.txt");
+ Digraph G = new Digraph(in);
+ StdOut.println(G);
+
+ int s = 7;
+ ResizingArrayQueue<Integer> components = new ResizingArrayQueue<Integer>();
+ SpecializedBFS bfs = new SpecializedBFS(G, s, components);
+
+ for (int v = 0; v < G.V(); v++) {
+ if (bfs.hasPathTo(v)) {
+ StdOut.printf("%2d -> %2d: [%d] ", s, v, bfs.distTo(v));
+ for (int x : bfs.pathTo(v)) {
+ if (x == s) StdOut.print(x);
+ else StdOut.print("->" + x);
+ }
+ StdOut.println();
+ } else {
+ StdOut.printf("%2d -> %2d: [-]\n", s, v);
+ }
+ }
+ StdOut.printf("components : ");
+ for (int c : components) {
+ if (c == s) StdOut.print(c);
+ else StdOut.print("->" + c);
+ }
+ StdOut.print("\n");
+ StdOut.printf("pathToCount(%d) : %2d\n", 4, bfs.pathToCount(4));
+ StdOut.printf("pathToCount(%d) : %2d\n", 9, bfs.pathToCount(9));
+
+ int s2 = 11;
+ SpecializedBFS bfs2 = new SpecializedBFS(G, s2, null);
+
+ for (int v = 0; v < G.V(); v++) {
+ if (bfs2.hasPathTo(v)) {
+ StdOut.printf("%2d -> %2d: [%d] ", s2, v, bfs2.distTo(v));
+ for (int x : bfs2.pathTo(v)) {
+ if (x == s2) StdOut.print(x);
+ else StdOut.print("->" + x);
+ }
+ StdOut.println();
+ } else {
+ StdOut.printf("%2d -> %2d: [-]\n", s2, v);
+ }
+ }
+
+ StdOut.printf("intersection : ");
+ for (int c : components) {
+ if (bfs2.hasPathTo(c))
+ StdOut.print(c+" ");
+ }
+ StdOut.print("\n");
+ }
+}
diff --git a/Algorithms/Part-II/1-WordNet/WordNet.java b/Algorithms/Part-II/1-WordNet/WordNet.java
index 830ccdf..2a854fa 100644
--- a/Algorithms/Part-II/1-WordNet/WordNet.java
+++ b/Algorithms/Part-II/1-WordNet/WordNet.java
@@ -1,58 +1,183 @@
/* vim: set expandtab tabstop=4 shiftwidth=4 : */
+import java.util.HashMap;
+import java.util.Iterator;
+
public class WordNet
{
// data type space linear in the input size
+ private HashMap<String, Bag<Integer>> wordsToIds;
+ private HashMap<Integer, String> idToString;
+ private Digraph dag;
+
+ private SAP sap;
// constructor takes the name of the two input files
+ // time linearithmic in the input size
public WordNet(String synsets, String hypernyms)
{
- // time linearithmic in the input size
+ int size = 0;
+ wordsToIds = new HashMap<String, Bag<Integer>>();
+ idToString = new HashMap<Integer, String>();
+
+ // load words from synsets
+ In syn = new In(synsets);
+ while (syn.hasNextLine()) {
+ String line = syn.readLine();
+ String[] fields = line.split(",");
+ int id = Integer.parseInt(fields[0]);
+ String[] words = fields[1].split(" ");
+ for (String word : words) {
+ Bag<Integer> bag = wordsToIds.get(word);
+ if (bag == null) {
+ bag = new Bag<Integer>();
+ bag.add(id);
+ wordsToIds.put(word, bag);
+ } else {
+ bag.add(id);
+ }
+ }
+ idToString.put(id, fields[1]);
+ size++;
+ }
+ syn.close();
+
+ dag = new Digraph(size);
+
+ boolean[] roots = new boolean[dag.V()];
+ for (int v = 0; v < dag.V(); v++)
+ roots[v] = true;
- // throw a java.lang.IllegalArgumentException
- // if the input does not correspond to a rooted DAG
+ // load edges from hypernyms
+ In hyp = new In(hypernyms);
+ while (hyp.hasNextLine()) {
+ String line = hyp.readLine();
+ String[] fields = line.split(",");
+ int id = Integer.parseInt(fields[0]);
+ if (fields.length > 1) roots[id] = false;
+ for (int i = 1; i < fields.length; i++) {
+ dag.addEdge(id, Integer.parseInt(fields[i]));
+ }
+ }
+ hyp.close();
+
+ int rootsCount = 0;
+ for (int v = 0; v < dag.V(); v++)
+ if (roots[v]) rootsCount++;
+ if (rootsCount != 1)
+ throw new IllegalArgumentException("roots "+rootsCount);
+
+ checkIndegrees();
+
+ sap = new SAP(dag);
+ }
+
+ private void checkIndegrees()
+ {
+ int[] indegree = new int[dag.V()];
+
+ // compute indegrees
+ for (int v = 0; v < dag.V(); v++) {
+ for (int w : dag.adj(v)) {
+ indegree[w]++;
+ }
+ }
+
+ // initialize queue to contain all vertices with indegree = 0 (leaves)
+ Queue<Integer> queue = new Queue<Integer>();
+ for (int v = 0; v < dag.V(); v++)
+ if (indegree[v] == 0) queue.enqueue(v);
+
+ // BFS and decrease indegrees
+ for (int j = 0; !queue.isEmpty(); j++) {
+ int v = queue.dequeue();
+ for (int w : dag.adj(v)) {
+ indegree[w]--;
+ if (indegree[w] == 0) queue.enqueue(w);
+ }
+ }
+
+ // check that all indegrees are 0
+ for (int v = 0; v < indegree.length; v++) {
+ if (indegree[v] != 0)
+ throw new IllegalArgumentException("indegrees");
+ }
}
// the set of nouns (no duplicates), returned as an Iterable
public Iterable<String> nouns()
{
- return null;
+ if (wordsToIds == null) return null;
+ return wordsToIds.keySet();
}
// is the word a WordNet noun?
public boolean isNoun(String word)
{
- // run in time logarithmic in the number of nouns
-
- return false;
+ if (wordsToIds == null) return false;
+ return wordsToIds.containsKey(word);
}
// distance between nounA and nounB (defined below)
public int distance(String nounA, String nounB)
{
- // run in time linear in the size of the WordNet digraph
+ if (!isNoun(nounA) || !isNoun(nounB))
+ throw new IllegalArgumentException();
+
+ Iterable<Integer> it0 = nounIterable(nounA);
+ Iterable<Integer> it1 = nounIterable(nounB);
- // throw java.lang.IllegalArgumentException
- // unless both of the noun arguments are WordNet nouns
+ return sap.length(it0, it1);
+ }
- return 0;
+ private Iterable<Integer> nounIterable(final String noun) {
+ return new Iterable<Integer>() {
+ public Iterator<Integer> iterator() {
+ return wordsToIds.get(noun).iterator();
+ }
+ };
}
// a synset that is the common ancestor of nounA and nounB
// in a shortest ancestral path
public String sap(String nounA, String nounB)
{
- // run in time linear in the size of the WordNet digraph
+ if (!isNoun(nounA) || !isNoun(nounB))
+ throw new java.lang.IllegalArgumentException();
- // throw java.lang.IllegalArgumentException
- // unless both of the noun arguments are WordNet nouns
+ Iterable<Integer> it0 = nounIterable(nounA);
+ Iterable<Integer> it1 = nounIterable(nounB);
- return "";
+ return idToString.get(sap.ancestor(it0, it1));
}
// for unit testing of this class
public static void main(String[] args)
{
+ WordNet wordNet = new WordNet("./data/synsets.txt", "./data/hypernyms.txt");
+
+ StdOut.println("23 white_marlin, mileage: "
+ + wordNet.distance("white_marlin", "mileage"));
+ StdOut.println("32 Black_Plague, black_marlin: "
+ + wordNet.distance("Black_Plague", "black_marlin"));
+ StdOut.println("32 American_water_spaniel, histology: "
+ + wordNet.distance("American_water_spaniel", "histology"));
+ StdOut.println("32 Brown_Swiss, barrel_roll: "
+ + wordNet.distance("Brown_Swiss", "barrel_roll"));
+ StdOut.println("bedspring Carolus_Linnaeus: "
+ + wordNet.sap("bedspring", "Carolus_Linnaeus"));
+ try {
+ StdOut.println("check roots");
+ new WordNet("./data/synsets.txt", "./data/hypernymsInvalidTwoRoots.txt");
+ StdOut.println("BAD");
+ }
+ catch (IllegalArgumentException e) { StdOut.println(e.getMessage()); }
+ try {
+ StdOut.println("check cycles");
+ new WordNet("./data/synsets.txt", "./data/hypernymsInvalidCycle.txt");
+ StdOut.println("BAD");
+ }
+ catch (IllegalArgumentException e) { StdOut.println(e.getMessage()); }
}
}
diff --git a/Algorithms/Part-II/1-WordNet/run.sh b/Algorithms/Part-II/1-WordNet/run.sh
index cd95d44..8c01871 100755
--- a/Algorithms/Part-II/1-WordNet/run.sh
+++ b/Algorithms/Part-II/1-WordNet/run.sh
@@ -2,7 +2,7 @@
export "CLASSPATH=$CLASSPATH:.:$HOME/algs4/algs4.jar:$HOME/algs4/stdlib.jar"
-CLASSES="WordNet SAP Outcast"
+CLASSES="SpecializedBFS WordNet SAP Outcast"
rm *.class *.zip 2>/dev/null