@@ -49,7 +49,53 @@ public function extractFromContent($content, MessageCatalogue $catalogue, $domai
4949 }
5050
5151 if ('2.0 ' === $ xliffVersion ) {
52- NSA ::invokeMethod ($ this , 'extractXliff2 ' , $ dom , $ catalogue , $ domain );
52+ $ this ->extractXliff2 ($ dom , $ catalogue , $ domain );
53+ }
54+ }
55+
56+ /**
57+ * @param \DOMDocument $dom
58+ * @param MessageCatalogue $catalogue
59+ * @param string $domain
60+ */
61+ private function extractXliff2 (\DOMDocument $ dom , MessageCatalogue $ catalogue , $ domain )
62+ {
63+ $ xml = simplexml_import_dom ($ dom );
64+ $ encoding = strtoupper ($ dom ->encoding );
65+
66+ $ xml ->registerXPathNamespace ('xliff ' , 'urn:oasis:names:tc:xliff:document:2.0 ' );
67+
68+ foreach ($ xml ->xpath ('//xliff:unit ' ) as $ unit ) {
69+ $ segment = $ unit ->segment ;
70+ $ source = $ segment ->source ;
71+
72+ // If the xlf file has another encoding specified, try to convert it because
73+ // simple_xml will always return utf-8 encoded values
74+ $ target = $ this ->utf8ToCharset ((string ) (isset ($ segment ->target ) ? $ segment ->target : $ source ), $ encoding );
75+
76+ $ catalogue ->set ((string ) $ source , $ target , $ domain );
77+
78+ $ metadata = [];
79+ if (isset ($ segment ->target ) && $ segment ->target ->attributes ()) {
80+ $ metadata ['target-attributes ' ] = [];
81+ foreach ($ segment ->target ->attributes () as $ key => $ value ) {
82+ $ metadata ['target-attributes ' ][$ key ] = (string ) $ value ;
83+ }
84+ }
85+
86+ if (isset ($ unit ->notes )) {
87+ $ metadata ['notes ' ] = [];
88+ foreach ($ unit ->notes ->note as $ noteNode ) {
89+ $ note = [];
90+ foreach ($ noteNode ->attributes () as $ key => $ value ) {
91+ $ note [$ key ] = (string ) $ value ;
92+ }
93+ $ note ['content ' ] = (string ) $ noteNode ;
94+ $ metadata ['notes ' ][] = $ note ;
95+ }
96+ }
97+
98+ $ catalogue ->setMetadata ((string ) $ source , $ metadata , $ domain );
5399 }
54100 }
55101
@@ -121,4 +167,21 @@ private function getXmlErrors($internalErrors)
121167
122168 return $ errors ;
123169 }
170+
171+ /**
172+ * Convert a UTF8 string to the specified encoding.
173+ *
174+ * @param string $content String to decode
175+ * @param string $encoding Target encoding
176+ *
177+ * @return string
178+ */
179+ private function utf8ToCharset ($ content , $ encoding = null )
180+ {
181+ if ('UTF-8 ' !== $ encoding && !empty ($ encoding )) {
182+ return mb_convert_encoding ($ content , $ encoding , 'UTF-8 ' );
183+ }
184+
185+ return $ content ;
186+ }
124187}
0 commit comments