@@ -1063,6 +1063,147 @@ public void readXMLAddDepBrokenAnnotation() {
1063
1063
}
1064
1064
}
1065
1065
1066
+ /**
1067
+ * Test a basic case of two nodes that should be merged
1068
+ *<br>
1069
+ * The indices should be changed as well
1070
+ */
1071
+ @ Test
1072
+ public void readXMLMergeNodes () {
1073
+ Ssurgeon inst = Ssurgeon .inst ();
1074
+
1075
+ // Test the head word being the first word
1076
+ String merge = String .join (newline ,
1077
+ "<ssurgeon-pattern-list>" ,
1078
+ " <ssurgeon-pattern>" ,
1079
+ " <uid>38</uid>" ,
1080
+ " <notes>Merge two nodes that should not have been split</notes>" ,
1081
+ " <semgrex>" + XMLUtils .escapeXML ("{word:prof}=source >punct ({}=punct . {} !> {})" ) + "</semgrex>" ,
1082
+ " <edit-list>mergeNodes source punct</edit-list>" ,
1083
+ " </ssurgeon-pattern>" ,
1084
+ "</ssurgeon-pattern-list>" );
1085
+ List <SsurgeonPattern > patterns = inst .readFromString (merge );
1086
+ assertEquals (patterns .size (), 1 );
1087
+ SsurgeonPattern mergeSsurgeon = patterns .get (0 );
1088
+
1089
+ SemanticGraph sg = SemanticGraph .valueOf ("[fare-7 aux> potrebbe-6 nsubj> [prof-3 det> Il-2 punct> .-4 nmod> Fotticchia-5] obj> [gag-9 det> una-8] obl> [situazione-12 case> su-10 det> la-11]]" , Language .UniversalEnglish );
1090
+ SemanticGraph newSG = mergeSsurgeon .iterate (sg ).first ;
1091
+ SemanticGraph expected = SemanticGraph .valueOf ("[fare-6 aux> potrebbe-5 nsubj> [prof.-3 det> Il-2 nmod> Fotticchia-4] obj> [gag-8 det> una-7] obl> [situazione-11 case> su-9 det> la-10]]" , Language .UniversalEnglish );
1092
+ assertEquals (expected , newSG );
1093
+ IndexedWord prof = sg .getNodeByIndexSafe (3 );
1094
+ assertNotNull (prof );
1095
+ assertEquals ("prof." , prof .word ());
1096
+ assertEquals ("prof." , prof .value ());
1097
+ assertNull (prof .lemma ());
1098
+
1099
+ // Same test, but this time test merging the lemmas
1100
+ sg = SemanticGraph .valueOf ("[fare-7 aux> potrebbe-6 nsubj> [prof-3 det> Il-2 punct> .-4 nmod> Fotticchia-5] obj> [gag-9 det> una-8] obl> [situazione-12 case> su-10 det> la-11]]" , Language .UniversalEnglish );
1101
+ sg .getNodeByIndexSafe (3 ).setLemma ("prof" );
1102
+ sg .getNodeByIndexSafe (4 ).setLemma ("." );
1103
+ newSG = mergeSsurgeon .iterate (sg ).first ;
1104
+ assertEquals (expected , newSG );
1105
+ prof = sg .getNodeByIndexSafe (3 );
1106
+ assertEquals ("prof." , prof .lemma ());
1107
+
1108
+ // Test the head word being the second word
1109
+ merge = String .join (newline ,
1110
+ "<ssurgeon-pattern-list>" ,
1111
+ " <ssurgeon-pattern>" ,
1112
+ " <uid>38</uid>" ,
1113
+ " <notes>Merge two nodes that should not have been split</notes>" ,
1114
+ " <semgrex>" + XMLUtils .escapeXML ("{word:prof}=source >punct ({}=punct . {} !> {})" ) + "</semgrex>" ,
1115
+ " <edit-list>mergeNodes source punct</edit-list>" ,
1116
+ " </ssurgeon-pattern>" ,
1117
+ "</ssurgeon-pattern-list>" );
1118
+ patterns = inst .readFromString (merge );
1119
+ assertEquals (patterns .size (), 1 );
1120
+ mergeSsurgeon = patterns .get (0 );
1121
+
1122
+ // Check what happens if the root of the phrase is on the right and the dep is on the left
1123
+ // The words & lemmas should still hopefully be merged in order
1124
+ sg = SemanticGraph .valueOf ("[fare-7 aux> potrebbe-6 nsubj> [prof-4 det> Il-2 punct> .-3 nmod> Fotticchia-5] obj> [gag-9 det> una-8] obl> [situazione-12 case> su-10 det> la-11]]" , Language .UniversalEnglish );
1125
+ sg .getNodeByIndexSafe (3 ).setLemma ("." );
1126
+ assertEquals ("." , sg .getNodeByIndexSafe (3 ).word ());
1127
+ sg .getNodeByIndexSafe (4 ).setLemma ("prof" );
1128
+ newSG = mergeSsurgeon .iterate (sg ).first ;
1129
+ expected = SemanticGraph .valueOf ("[fare-6 aux> potrebbe-5 nsubj> [.prof-3 det> Il-2 nmod> Fotticchia-4] obj> [gag-8 det> una-7] obl> [situazione-11 case> su-9 det> la-10]]" , Language .UniversalEnglish );
1130
+ assertEquals (expected , newSG );
1131
+ prof = newSG .getNodeByIndexSafe (3 );
1132
+ assertEquals (".prof" , prof .word ());
1133
+ assertEquals (".prof" , prof .lemma ());
1134
+ }
1135
+
1136
+
1137
+ /**
1138
+ * Test a basic case of two nodes that should be merged
1139
+ *<br>
1140
+ * The indices should be changed as well
1141
+ */
1142
+ @ Test
1143
+ public void readXMLMergeNodesAttributes () {
1144
+ Ssurgeon inst = Ssurgeon .inst ();
1145
+
1146
+ // Test the head word being the first word
1147
+ String merge = String .join (newline ,
1148
+ "<ssurgeon-pattern-list>" ,
1149
+ " <ssurgeon-pattern>" ,
1150
+ " <uid>38</uid>" ,
1151
+ " <notes>Merge two nodes that should not have been split</notes>" ,
1152
+ " <semgrex>" + XMLUtils .escapeXML ("{word:prof}=source >punct ({}=punct . {} !> {})" ) + "</semgrex>" ,
1153
+ " <edit-list>mergeNodes source punct -word foo -lemma bar</edit-list>" ,
1154
+ " </ssurgeon-pattern>" ,
1155
+ "</ssurgeon-pattern-list>" );
1156
+ List <SsurgeonPattern > patterns = inst .readFromString (merge );
1157
+ assertEquals (patterns .size (), 1 );
1158
+ SsurgeonPattern mergeSsurgeon = patterns .get (0 );
1159
+
1160
+ SemanticGraph sg = SemanticGraph .valueOf ("[fare-7 aux> potrebbe-6 nsubj> [prof-3 det> Il-2 punct> .-4 nmod> Fotticchia-5] obj> [gag-9 det> una-8] obl> [situazione-12 case> su-10 det> la-11]]" , Language .UniversalEnglish );
1161
+ SemanticGraph newSG = mergeSsurgeon .iterate (sg ).first ;
1162
+ SemanticGraph expected = SemanticGraph .valueOf ("[fare-6 aux> potrebbe-5 nsubj> [foo-3 det> Il-2 nmod> Fotticchia-4] obj> [gag-8 det> una-7] obl> [situazione-11 case> su-9 det> la-10]]" , Language .UniversalEnglish );
1163
+ assertEquals (expected , newSG );
1164
+ IndexedWord prof = sg .getNodeByIndexSafe (3 );
1165
+ assertNotNull (prof );
1166
+ assertEquals ("foo" , prof .word ());
1167
+ assertEquals ("foo" , prof .value ());
1168
+ assertEquals ("bar" , prof .lemma ());
1169
+ }
1170
+
1171
+ /**
1172
+ * Test a basic case of two nodes that should be merged
1173
+ *<br>
1174
+ * The indices should be changed as well
1175
+ */
1176
+ @ Test
1177
+ public void readXMLMergeNodesFailCases () {
1178
+ Ssurgeon inst = Ssurgeon .inst ();
1179
+
1180
+ // use "dep" as the dependency so as to be language-agnostic in this test
1181
+ String merge = String .join (newline ,
1182
+ "<ssurgeon-pattern-list>" ,
1183
+ " <ssurgeon-pattern>" ,
1184
+ " <uid>38</uid>" ,
1185
+ " <notes>Merge two nodes that should not have been split</notes>" ,
1186
+ " <semgrex>" + XMLUtils .escapeXML ("{word:prof}=source >punct ({}=punct . {} !> {})" ) + "</semgrex>" ,
1187
+ " <edit-list>mergeNodes source punct</edit-list>" ,
1188
+ " </ssurgeon-pattern>" ,
1189
+ "</ssurgeon-pattern-list>" );
1190
+ List <SsurgeonPattern > patterns = inst .readFromString (merge );
1191
+ assertEquals (patterns .size (), 1 );
1192
+ SsurgeonPattern mergeSsurgeon = patterns .get (0 );
1193
+
1194
+ // Add an extra edge from the punct we want to squash to somewhere else
1195
+ // The graph should not be changed
1196
+ SemanticGraph sg = SemanticGraph .valueOf ("[fare-7 aux> potrebbe-6 nsubj> [prof-3 det> Il-2 nmod> Fotticchia-5 punct> [.-4 nmod> Fotticchia-5]] obj> [gag-9 det> una-8] obl> [situazione-12 case> su-10 det> la-11]]" , Language .UniversalEnglish );
1197
+ SemanticGraph newSG = mergeSsurgeon .iterate (sg ).first ;
1198
+ SemanticGraph expected = SemanticGraph .valueOf ("[fare-7 aux> potrebbe-6 nsubj> [prof-3 det> Il-2 nmod> Fotticchia-5 punct> [.-4 nmod> Fotticchia-5]] obj> [gag-9 det> una-8] obl> [situazione-12 case> su-10 det> la-11]]" , Language .UniversalEnglish );
1199
+ assertEquals (expected , newSG );
1200
+
1201
+ sg = SemanticGraph .valueOf ("[fare-7 aux> potrebbe-6 nsubj> [prof-3 det> Il-2 nmod> [Fotticchia-5 punct> .-4] punct> .-4] obj> [gag-9 det> una-8] obl> [situazione-12 case> su-10 det> la-11]]" , Language .UniversalEnglish );
1202
+ newSG = mergeSsurgeon .iterate (sg ).first ;
1203
+ expected = SemanticGraph .valueOf ("[fare-7 aux> potrebbe-6 nsubj> [prof-3 det> Il-2 nmod> [Fotticchia-5 punct> .-4] punct> .-4] obj> [gag-9 det> una-8] obl> [situazione-12 case> su-10 det> la-11]]" , Language .UniversalEnglish );
1204
+ assertEquals (expected , newSG );
1205
+ }
1206
+
1066
1207
/**
1067
1208
* The AddDep should update the matches in the SemgrexMatcher.
1068
1209
* If that isn't done correctly, then moving the words first
0 commit comments