Skip to content

Commit b118082

Browse files
committed
Add a Java command line tool which converts trees to dependency graphs using protobufs. Included is an update to SemanticGraph.valueOf to set a sentIndex and an option in Tree to yield CoreLabels with the word as the Value instead of the tag
1 parent 92771b4 commit b118082

File tree

4 files changed

+191
-10
lines changed

4 files changed

+191
-10
lines changed

src/edu/stanford/nlp/semgraph/SemanticGraph.java

+24-6
Original file line numberDiff line numberDiff line change
@@ -1693,18 +1693,31 @@ public SemanticGraphEdge addEdge(SemanticGraphEdge edge) {
16931693
*
16941694
* This is the same format generated by toCompactString().
16951695
*/
1696+
public static SemanticGraph valueOf(String s, Language language, Integer sentIndex) {
1697+
return (new SemanticGraphParsingTask(s, language, sentIndex)).parse();
1698+
}
1699+
1700+
/**
1701+
* @see SemanticGraph#valueOf(String, Language, Integer)
1702+
*/
16961703
public static SemanticGraph valueOf(String s, Language language) {
16971704
return (new SemanticGraphParsingTask(s, language)).parse();
16981705
}
16991706

17001707
/**
1701-
*
1702-
* @see SemanticGraph#valueOf(String, Language)
1708+
* @see SemanticGraph#valueOf(String, Language, Integer)
17031709
*/
17041710
public static SemanticGraph valueOf(String s) {
17051711
return valueOf(s, Language.UniversalEnglish);
17061712
}
17071713

1714+
/**
1715+
* @see SemanticGraph#valueOf(String, Language, Integer)
1716+
*/
1717+
public static SemanticGraph valueOf(String s, int sentIndex) {
1718+
return valueOf(s, Language.UniversalEnglish, sentIndex);
1719+
}
1720+
17081721

17091722
public SemanticGraph() {
17101723
graph = new DirectedMultiGraph<>(outerMapFactory, innerMapFactory);
@@ -1838,16 +1851,21 @@ private static class SemanticGraphParsingTask extends StringParsingTask<Semantic
18381851

18391852
private SemanticGraph sg;
18401853
private Set<Integer> indexesUsed = Generics.newHashSet();
1841-
private Language language;
1842-
1854+
private final Language language;
1855+
private final Integer sentIndex;
18431856

18441857
public SemanticGraphParsingTask(String s) {
1845-
this(s, Language.UniversalEnglish);
1858+
this(s, Language.UniversalEnglish, null);
18461859
}
18471860

18481861
public SemanticGraphParsingTask(String s, Language language) {
1862+
this(s, language, null);
1863+
}
1864+
1865+
public SemanticGraphParsingTask(String s, Language language, Integer sentIndex) {
18491866
super(s);
18501867
this.language = language;
1868+
this.sentIndex = sentIndex;
18511869
}
18521870

18531871
@Override
@@ -1909,7 +1927,7 @@ private IndexedWord makeVertex(String word) {
19091927
// nothing is actually enforcing that no indexes are used twice. This
19101928
// could occur if some words in the string representation being parsed
19111929
// come with index markers and some do not.
1912-
IndexedWord ifl = new IndexedWord(null, 0, index);
1930+
IndexedWord ifl = new IndexedWord(null, sentIndex != null ? sentIndex : 0, index);
19131931
// log.info("SemanticGraphParsingTask>>> word = " + word);
19141932
// log.info("SemanticGraphParsingTask>>> index = " + index);
19151933
// log.info("SemanticGraphParsingTask>>> indexesUsed = " +
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
package edu.stanford.nlp.trees;
2+
3+
/**
4+
* A tool to turn Tree objects into dependencies
5+
*
6+
* Only works for English (at least for now)
7+
*/
8+
9+
import java.io.InputStream;
10+
import java.io.IOException;
11+
import java.io.OutputStream;
12+
import java.util.List;
13+
import java.util.stream.Collectors;
14+
15+
import edu.stanford.nlp.ling.IndexedWord;
16+
import edu.stanford.nlp.ling.CoreAnnotations;
17+
import edu.stanford.nlp.pipeline.CoreNLPProtos;
18+
import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
19+
import edu.stanford.nlp.semgraph.SemanticGraph;
20+
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
21+
import edu.stanford.nlp.trees.Tree;
22+
import edu.stanford.nlp.util.ProcessProtobufRequest;
23+
24+
public class ProcessDependencyConverterRequest extends ProcessProtobufRequest {
25+
/**
26+
* Convert a single Tree to basic dependencies
27+
*/
28+
static SemanticGraph convert(Tree tree) {
29+
SemanticGraph uncollapsedDeps = SemanticGraphFactory.generateUncollapsedDependencies(tree);
30+
return uncollapsedDeps;
31+
}
32+
33+
/**
34+
* Process a single request, responding with basic dependencies for each tree
35+
*/
36+
static CoreNLPProtos.DependencyConverterResponse processRequest(CoreNLPProtos.DependencyConverterRequest request) {
37+
ProtobufAnnotationSerializer serializer = new ProtobufAnnotationSerializer();
38+
CoreNLPProtos.DependencyConverterResponse.Builder responseBuilder = CoreNLPProtos.DependencyConverterResponse.newBuilder();
39+
40+
List<CoreNLPProtos.FlattenedParseTree> flattenedTrees = request.getTreesList();
41+
int treeIdx = 0;
42+
for (CoreNLPProtos.FlattenedParseTree flattenedTree : flattenedTrees) {
43+
Tree tree = ProtobufAnnotationSerializer.fromProto(flattenedTree);
44+
SemanticGraph graph = convert(tree);
45+
for (IndexedWord node : graph.vertexSet()) {
46+
node.set(CoreAnnotations.SentenceIndexAnnotation.class, treeIdx);
47+
}
48+
CoreNLPProtos.DependencyConverterResponse.DependencyConversion.Builder conversionBuilder = CoreNLPProtos.DependencyConverterResponse.DependencyConversion.newBuilder();
49+
conversionBuilder.setGraph(ProtobufAnnotationSerializer.toProto(graph));
50+
conversionBuilder.setTree(flattenedTree);
51+
responseBuilder.addConversions(conversionBuilder.build());
52+
++treeIdx;
53+
}
54+
return responseBuilder.build();
55+
}
56+
57+
/**
58+
* Process a single request from a stream, responding with basic dependencies for each tree
59+
*/
60+
@Override
61+
public void processInputStream(InputStream in, OutputStream out) throws IOException {
62+
CoreNLPProtos.DependencyConverterRequest request = CoreNLPProtos.DependencyConverterRequest.parseFrom(in);
63+
CoreNLPProtos.DependencyConverterResponse response = processRequest(request);
64+
response.writeTo(out);
65+
}
66+
67+
/**
68+
* The inherited main program will either enhance a single document,
69+
* or will listen to stdin and enhance every document that comes in
70+
* until a terminator is sent or the stream closes
71+
*/
72+
public static void main(String[] args) throws IOException {
73+
ProcessProtobufRequest.process(new ProcessDependencyConverterRequest(), args);
74+
}
75+
}

src/edu/stanford/nlp/trees/Tree.java

+20-4
Original file line numberDiff line numberDiff line change
@@ -1625,12 +1625,24 @@ public List<LabeledWord> labeledYield(List<LabeledWord> ty) {
16251625
* @return A tagged, labeled yield.
16261626
*/
16271627
public List<CoreLabel> taggedLabeledYield() {
1628+
return taggedLabeledYield(true);
1629+
}
1630+
1631+
1632+
/** Returns a {@code List<CoreLabel>} from the tree.
1633+
* These are a copy of the complete token representation
1634+
* along with the tag.
1635+
*
1636+
* @param tagValues use the tags for the values (otherwise use the leaf)
1637+
* @return A tagged, labeled yield.
1638+
*/
1639+
public List<CoreLabel> taggedLabeledYield(boolean tagValues) {
16281640
List<CoreLabel> ty = new ArrayList<>();
1629-
taggedLabeledYield(ty, 0);
1641+
taggedLabeledYield(ty, 0, tagValues);
16301642
return ty;
16311643
}
16321644

1633-
private int taggedLabeledYield(List<CoreLabel> ty, int termIdx) {
1645+
private int taggedLabeledYield(List<CoreLabel> ty, int termIdx, boolean tagValues) {
16341646
if (isPreTerminal()) {
16351647
// usually this will fill in all the usual keys for a token
16361648
CoreLabel taggedWord = new CoreLabel(firstChild().label());
@@ -1640,7 +1652,11 @@ private int taggedLabeledYield(List<CoreLabel> ty, int termIdx) {
16401652
}
16411653
final String tag = (value() == null) ? "" : value();
16421654
// set value and tag to the tag
1643-
taggedWord.setValue(tag);
1655+
if (tagValues) {
1656+
taggedWord.setValue(tag);
1657+
} else {
1658+
taggedWord.setValue(taggedWord.word());
1659+
}
16441660
taggedWord.setTag(tag);
16451661
taggedWord.setIndex(termIdx);
16461662
ty.add(taggedWord);
@@ -1649,7 +1665,7 @@ private int taggedLabeledYield(List<CoreLabel> ty, int termIdx) {
16491665

16501666
} else {
16511667
for (Tree kid : getChildrenAsList())
1652-
termIdx = kid.taggedLabeledYield(ty, termIdx);
1668+
termIdx = kid.taggedLabeledYield(ty, termIdx, tagValues);
16531669
}
16541670

16551671
return termIdx;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package edu.stanford.nlp.trees;
2+
3+
import java.util.List;
4+
import java.util.stream.Collectors;
5+
6+
import org.junit.Assert;
7+
import org.junit.Test;
8+
9+
import edu.stanford.nlp.ling.CoreLabel;
10+
import edu.stanford.nlp.ling.IndexedWord;
11+
import edu.stanford.nlp.pipeline.CoreNLPProtos;
12+
import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
13+
import edu.stanford.nlp.semgraph.SemanticGraph;
14+
import edu.stanford.nlp.trees.Tree;
15+
16+
public class ProcessDependencyConverterRequestTest {
17+
18+
static CoreNLPProtos.DependencyConverterRequest buildRequest(String ... trees) {
19+
CoreNLPProtos.DependencyConverterRequest.Builder builder = CoreNLPProtos.DependencyConverterRequest.newBuilder();
20+
21+
for (String tree : trees) {
22+
Tree t = Tree.valueOf(tree);
23+
builder.addTrees(ProtobufAnnotationSerializer.toFlattenedTree(t));
24+
}
25+
26+
return builder.build();
27+
}
28+
29+
static void checkResults(CoreNLPProtos.DependencyConverterResponse response, String ... expectedResults) {
30+
Assert.assertEquals(expectedResults.length, response.getConversionsList().size());
31+
for (int i = 0; i < expectedResults.length; ++i) {
32+
CoreNLPProtos.DependencyGraph responseGraph = response.getConversionsList().get(i).getGraph();
33+
CoreNLPProtos.FlattenedParseTree responseTree = response.getConversionsList().get(i).getTree();
34+
Tree tree = ProtobufAnnotationSerializer.fromProto(responseTree);
35+
List<CoreLabel> sentence = tree.taggedLabeledYield(false);
36+
37+
SemanticGraph expected = SemanticGraph.valueOf(expectedResults[i], i);
38+
SemanticGraph graph = ProtobufAnnotationSerializer.fromProto(responseGraph, sentence, null);
39+
//for (IndexedWord word : expected.vertexSet()) {
40+
// System.out.println(word + " " + word.index() + " " + word.sentIndex() + " " + word.docID());
41+
//}
42+
//for (IndexedWord word : graph.vertexSet()) {
43+
// System.out.println(word + " " + word.index() + " " + word.sentIndex() + " " + word.docID());
44+
//}
45+
//System.out.println(expected.toCompactString());
46+
//System.out.println(graph.toCompactString());
47+
Assert.assertEquals(expected, graph);
48+
}
49+
}
50+
51+
/** Test a single Tree turning into Dependencies */
52+
@Test
53+
public void testOneTree() {
54+
CoreNLPProtos.DependencyConverterRequest request = buildRequest("(ROOT (S (NP (NNP Jennifer)) (VP (VBZ has) (NP (JJ nice) (NNS antennae)))))");
55+
CoreNLPProtos.DependencyConverterResponse response = ProcessDependencyConverterRequest.processRequest(request);
56+
checkResults(response, "[has/VBZ-1 nsubj>Jennifer/NNP-0 obj>[antennae/NNS-3 amod>nice/JJ-2]]");
57+
}
58+
59+
/** Test two trees turning into Dependencies */
60+
@Test
61+
public void testTwoTrees() {
62+
CoreNLPProtos.DependencyConverterRequest request = buildRequest("(ROOT (S (NP (NNP Jennifer)) (VP (VBZ has) (NP (JJ nice) (NNS antennae)))))",
63+
"(ROOT (S (NP (PRP She)) (VP (VBZ is) (ADJP (RB hella) (JJ basic)) (ADVP (RB though)))))");
64+
CoreNLPProtos.DependencyConverterResponse response = ProcessDependencyConverterRequest.processRequest(request);
65+
checkResults(response,
66+
"[has/VBZ-1 nsubj>Jennifer/NNP-0 obj>[antennae/NNS-3 amod>nice/JJ-2]]",
67+
"[basic/JJ-3 nsubj>She/PRP-0 cop>is/VBZ-1 advmod>hella/RB-2 advmod>though/RB-4]");
68+
}
69+
70+
}
71+
72+

0 commit comments

Comments
 (0)