1 ;;; xpath.el --- XPATH implementation
3 ;; Copyright (C) 2001 Alex Schroeder <alex@gnu.org>
5 ;; Author: Alex Schroeder <alex@gnu.org>
6 ;; Maintainer: Oliver Scholz <epameinondas@gmx.de>
9 ;; URL: http://www.emacswiki.org/cgi-bin/wiki.pl?XmlParser
10 ;; Version: $Id: xpath.el,v 1.1 2003/12/16 00:32:00 egoge Exp egoge $
12 ;; This file is not part of GNU Emacs.
14 ;; This is free software; you can redistribute it and/or modify it under
15 ;; the terms of the GNU General Public License as published by the Free
16 ;; Software Foundation; either version 2, or (at your option) any later
19 ;; This is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
27 ;; Boston, MA 02111-1307, USA.
31 ;; If you are working with XML documents, you may parse the documents
32 ;; using the XML parser included with Emacs (xml.el), and pass the data
33 ;; structure to the DOM implementation (dom.el). You can then use XPATH
38 ;; The test code assumes a file named sample.xml with the following
41 ;; <book id="compiler">
44 ;; <title>My own book!</title>
45 ;; <edition>First</edition>
48 ;; <firstname>John</firstname>
49 ;; <surname>Wiegley</surname>
55 ;; <title>A very small chapter</title>
56 ;; <para>Wonder where the content is...</para>
64 (require 'xpath-parser)
68 (defun xpath-follow-axis (node axis)
69 "Return all the nodes on AXIS relative to node.
70 AXIS must be a string used in `xpath-axes'."
71 (let ((func (cadr (assoc axis xpath-axes))))
74 (error "Unknown axis: " axis))))
76 (defun xpath-ancestor-axis (node)
77 "Return the elements on the ancestor axis.
78 The ancestor axis contains the ancestors of the context node. The
79 ancestors of the context node consist of the parent of context node and
80 the parent's parent and so on. Thus, the ancestor axis will always
81 include the root node, unless the context node is the root node.
83 See `dom-node-parent-node'."
84 (let ((parent (dom-node-parent-node node))
87 (setq result (cons parent result)
88 parent (dom-node-parent-node parent)))
91 (defun xpath-ancestor-or-self-axis (node)
92 "Return NODE and the elements of the ancestor axis.
93 The ancestor-or-self axis contains the context node and the ancestors of
94 the context node. Thus, the ancestor axis will always include the root
97 See `xpath-ancestor-axis'."
98 (cons node (xpath-ancestor-axis node)))
100 (defun xpath-attribute-axis (node)
101 "Return the elements of the attribute axis.
102 The attribute axis contains the attributes of the context node. The
103 axis will be empty unless the context node is an element.
105 See `dom-node-attributes'."
106 (dom-node-attributes node))
108 (defun xpath-child-axis (node)
109 "Return the elements of the child axis.
110 The child axis contains the children of the context node.
112 See `dom-node-child-nodes'."
113 (dom-node-child-nodes node))
115 (defun xpath-descendant-axis (node)
116 "Return the elements of the descendant axis.
117 The descendant axis contains the descendants of the context node. A
118 descendant is a child or a child of a child and so on. Thus the
119 descendant axis never contains attribute or namespace nodes."
120 ;; We don't want to call this recursively because of performance.
121 (setq node (dom-node-first-child node))
124 (setq result (cons node result)
125 node (cond ((dom-node-first-child node)
126 (when (dom-node-next-sibling node)
127 (push (dom-node-next-sibling node) stack))
128 (dom-node-first-child node))
129 ((dom-node-next-sibling node))
133 (defun xpath-descendant-or-self-axis (node)
134 "Return the elements of the descendant-or-self axis.
135 The descendant-or-self axis contains the context node and the
136 descendants of the context node.
138 See `xpath-descendant-axis'."
139 (cons node (xpath-descendant-axis node)))
141 (defun xpath-following-axis (node)
142 "Return the elements of the following axis.
143 The following axis contains all nodes in the same document as the
144 context node that are after the context node in document order,
145 excluding any descendants and excluding attribute nodes and namespace
147 ;; We don't want to call this recursively because of performance.
148 (let ((ancestors (xpath-ancestor-or-self-axis node))
150 ;; The stack holds all the ancestors which have a next sibling.
151 ;; Note that this is very very inefficient if dom-node-next-sibling
152 ;; is very inefficient (as it currently is).
153 (dolist (ancestor ancestors)
154 (let ((next-sibling (dom-node-next-sibling ancestor)))
156 (setq stack (cons next-sibling stack)))))
157 (setq stack (nreverse stack)
160 (setq result (cons node result)
161 node (cond ((dom-node-first-child node)
162 (when (dom-node-next-sibling node)
163 (push (dom-node-next-sibling node) stack))
164 (dom-node-first-child node))
165 ((dom-node-next-sibling node))
169 (defun xpath-following-sibling-axis (node)
170 "Return the elements of the following-sibling axis.
171 The following-sibling axis contains all the following siblings of the
172 context node. If the context node is an attribute node or namespace
173 node, the following-sibling axis is empty."
174 (let ((parent (dom-node-parent-node node)))
176 (cdr (memq node (dom-node-child-nodes parent))))))
178 (defun xpath-parent-axis (node)
179 "Return the only element of the parent-axis.
180 The parent axis contains the parent of the context node, if there is
183 See `dom-node-parent'."
184 (list (dom-node-parent-node node)))
186 (defun xpath-preceding-axis (node)
187 "Return the elements of the preceding axis.
188 The preceding axis contains all nodes in the same document as the
189 context node that are before the context node in document order,
190 excluding any ancestors and excluding attribute nodes and namespace
192 ;; We don't want to call this recursively because of performance.
193 (let ((ancestors (xpath-ancestor-or-self-axis node))
196 ;; We just add the elements in document order, skipping ancestors,
197 ;; until we reach the context node.
198 (setq node (dom-document-element (dom-node-owner-document context-node)))
200 (when (not (memq node ancestors))
201 (setq result (cons node result)))
202 (setq node (cond ((dom-node-first-child node)
203 (when (dom-node-next-sibling node)
204 (push (dom-node-next-sibling node) stack))
205 (dom-node-first-child node))
206 ((dom-node-next-sibling node))
208 (when (eq node context-node)
212 (defun xpath-preceding-sibling-axis (node)
213 "Return the elements on the preceding-sibling axis.
214 The preceding-sibling axis contains all the preceding siblings of the
215 context node. If the context node is an attribute node or namespace
216 node, the preceding-sibling axis is empty."
217 (let ((parent (dom-node-parent-node node)))
219 (let ((list (dom-node-child-nodes parent))
221 (while (and list (not (eq (car list) node)))
222 (setq result (cons (car list) result)
226 ;; FIXME: Namespaces not implemented.
227 ;; The namespace axis contains the namespace nodes of the context node.
228 ;; The axis will be empty unless the context node is an element.
230 (defun xpath-self-axis (node)
231 "Return the element on the self axis.
232 The self axis contains just the context node itself."
237 (defun xpath-name-filter (nodes name)
238 "Filter NODES by NAME.
239 If NAME is \"*\", return NODES."
240 (if (string= name "*")
244 (when (string= name (dom-node-name node))
245 (setq result (cons node result))))
248 (defun xpath-text-filter (nodes)
249 "Filter NODES, retaining only text nodes."
252 (when (eq (dom-node-type node) dom-text-node)
253 (setq result (cons node result))))
256 ;; FIXME: xpath-comment-filter and xpath-processing-instruction-filter
257 ;; are not implemented, yet.
259 ;;; Node Set Functions
261 ;; For each node in the node-set to be filtered, the PredicateExpr is
262 ;; evaluated with that node as the context node, with the number of
263 ;; nodes in the node-set as the context size, and with the proximity
264 ;; position of the node in the node-set with respect to the axis as the
265 ;; context position; if PredicateExpr evaluates to true for that node,
266 ;; the node is included in the new node-set; otherwise, it is not
269 (defvar xpath-context-node)
270 (defvar xpath-context-size)
271 (defvar xpath-context-position)
273 ;; A PredicateExpr is evaluated by evaluating the Expr and converting
274 ;; the result to a boolean. If the result is a number, the result will
275 ;; be converted to true if the number is equal to the context position
276 ;; and will be converted to false otherwise; if the result is not a
277 ;; number, then the result will be converted as if by a call to the
278 ;; boolean function. Thus a location path para[3] is equivalent to
279 ;; para[position()=3].
281 ;; FIXME: Function related stuff is not implemented.
283 (defun xpath-function/last ()
284 "Return a number equal to the context size from the expression
288 (defun xpath-function/position ()
289 "Return a number equal to the context position from the expression
291 xpath-context-position)
293 (defun xpath-function/count (node-set)
294 "Return the number of nodes in NODE-SET."
297 (defun xpath-function/name (&optional node-set)
298 "Return the name of the first element in NODE-SET.
299 If optional argument NODE-SET is not given, return the name
300 of the context-node."
302 (dom-node-name (car node-set))
303 (dom-node-name xpath-context-node)))
307 (defun xpath-number (&optional obj)
308 "Return the numeric value for OBJ."
310 (setq obj xpath-context-node))
311 (cond ((and (listp obj)
312 (dom-element-p (car obj)))
313 (setq obj (xpath-string obj)))
314 ((dom-element-p obj); This is not in the spec!
315 (setq obj (xpath-string obj))))
319 (if (string-match "[^0-9.eE- \t\n\r\l]" obj)
321 (string-to-number obj)))
326 (t (error "Cannot convert %S to a string" obj))))
328 (defun xpath-string (obj)
329 "Return the string-value for OBJ.
330 This is computed as follows:
331 No computation is necessary for strings.
332 Numbers are passed to `string-to-number'.
333 nil is \"false\". t is \"true\".
334 A DOM Element is passed to `dom-element-text-content'.
335 A DOM Node List gets the string value of its first element.
336 A DOM Attribute is passed to `dom-attr-value'."
340 (string-to-number obj))
346 (dom-element-p (car obj)))
347 (dom-element-text-content (car obj)))
349 (dom-element-text-content (car obj)))
351 (dom-attr-value obj))
352 (t (error "Cannot convert %S to a string" obj))))
354 ;; A little evaluator.
355 (defun xpath-eval (expression)
356 "Evaluate EXPRESSION."
357 (if (and (listp expression)
358 (functionp (car expression)))
362 (defun xpath-equal (nodes a b)
363 "Filter nodes, retaining nodes where A and B are equal.
364 Equality is determined as follows:
365 If either A or B are booleans, compare booleans.
366 If either A or B are numbers, compare numbers.
367 Else, compare strings. See `xpath-string'."
368 ;; FIXME: Needs more work to be really compliant.
369 (let ((xpath-context-position 0)
370 (xpath-context-size (length nodes))
373 (setq xpath-context-position (1+ xpath-context-position))
374 (let* ((xpath-context-node node)
377 (when (cond ((listp a)
379 (while (and (not result) a)
380 (setq result (xpath-equal nodes (car a) b)
385 (while (and (not result) b)
386 (setq result (xpath-equal nodes a (car b))
389 ;; ((or (boolean-p a) (boolean-p b))
390 ;; ;; The following trick treats any non-nil value as t.
391 ;; (eq (not a) (not b)))
392 ((or (eq a t) (eq b t))
393 ;; The following trick treats any non-nil value as t.
394 (eq (not a) (not b)))
395 ((or (numberp a) (numberp b))
396 (= (xpath-number a) (xpath-number b)))
397 ((or (stringp a) (stringp b))
398 (string= (xpath-string a) (xpath-string b)))
401 (setq result (cons node result)))))
404 ;; Resolving an XPath
406 (defun xpath-resolve-axis (nodes func)
407 "Apply FUNC to every node in NODES and return the concatenation."
408 ;; Use append instead of nconc, because if this is the child axis, for
409 ;; example, then the list returned will be the original list of
411 (apply 'append (mapcar (lambda (node) (funcall func node))
414 (defun xpath-resolve (node xpath)
415 "Resolve XPATH relative to NODE.
416 XPATH is a string, NODE is the context node.
417 This returns a list of nodes."
418 (let ((steps (xpath-steps xpath)))
419 ;; If XPATH is an absolute location path, then the car of STEPS is
420 ;; the (uninterned) symbol in the variable
421 ;; `xpath-document-root-symbol'. In this case we must start from
423 (when (eq (car steps) xpath-document-root-symbol)
424 (setq node (dom-document-element
425 (dom-node-owner-document node))
427 (xpath-resolve-steps node steps)))
429 (defun xpath-resolve-steps (node steps)
430 "Resolve STEPS relative to NODE.
431 STEPS is a parsed XPATH.
432 See `xpath-resolve' and `xpath-steps'."
433 (let ((nodes (list node)))
435 ;; For each node, get the nodes on the axis and concatenate the result.
436 (let ((func (car step)))
437 (setq nodes (xpath-resolve-axis nodes func)))
438 ;; Apply each of the predicates.
439 (let ((predicates (cdr step))
441 (while (and nodes predicates)
442 (setq predicate (car predicates)
443 predicates (cdr predicates))
444 (let ((func (car predicate))
445 (args (cdr predicate)))
446 (setq nodes (apply func nodes args))))))
451 (defmacro xpath-assert (expr)
453 (error "Test failed: %S" ',expr)))
455 (defun xpath-test-clean-xml (obj)
456 (cond ((null obj) nil)
459 (cons (xpath-test-clean-xml (car obj))
460 (xpath-test-clean-xml (cdr obj))))
463 (if (string-match "\\`[\n\t ]+\\'" (car obj))
464 (xpath-test-clean-xml (cdr obj))
467 (string-match "\\`[\n\t ]*\\(.*\\)[\n\t ]*\\'" (car obj))
468 (match-string 1 (car obj)))
469 (xpath-test-clean-xml (cdr obj)))))
470 (t (cons (car obj) (xpath-test-clean-xml (cdr obj))))))))
473 (file-readable-p "sample.xml"))
476 (defvar xpath-test-data
477 (xpath-test-clean-xml
478 (car (xml-parse-file "sample.xml"))))
480 (defvar xpath-test-document
481 (dom-make-document-from-xml xpath-test-data))
483 ;; (defvar xpath-test-node
484 ;; (car (dom-document-get-elements-by-tag-name
485 ;; xpath-test-document
488 ;; (xpath-resolve xpath-test-node "descendant::title")
489 ;; (xpath-resolve xpath-test-node "child::bookbiblio/child::title")
490 ;; (xpath-resolve xpath-test-node "/child::chapter/child::title")
492 ;; (setq data (car (xml-parse-file "sample.xml")))
493 (let ((title (car (dom-document-get-elements-by-tag-name
496 (xpath-assert (equal (mapcar 'dom-node-name
497 (xpath-ancestor-axis title))
498 '(bookbiblio bookinfo book)))
499 (xpath-assert (equal (mapcar 'dom-node-name
500 (xpath-ancestor-or-self-axis title))
501 '(title bookbiblio bookinfo book)))
502 (xpath-assert (equal (mapcar 'dom-node-name
503 (xpath-attribute-axis (dom-document-element
504 xpath-test-document)))
506 (xpath-assert (equal (mapcar 'dom-node-name
507 (xpath-child-axis (dom-document-element
508 xpath-test-document)))
509 '(bookinfo chapter)))
510 (xpath-assert (equal (mapcar 'dom-node-name
511 (xpath-descendant-axis
512 (dom-element-last-child
513 (dom-document-element
514 xpath-test-document))))
515 '(title \#text para \#text)))
516 (xpath-assert (equal (mapcar 'dom-node-name
517 (xpath-descendant-or-self-axis
518 (dom-element-last-child
519 (dom-document-element
520 xpath-test-document))))
521 '(chapter title \#text para \#text)))
522 (xpath-assert (equal (mapcar 'dom-node-name
523 (xpath-descendant-axis
524 (dom-document-element xpath-test-document)))
525 '(bookinfo bookbiblio title \#text edition \#text
526 authorgroup author firstname \#text surname
527 \#text chapter title \#text para \#text)))
528 (xpath-assert (equal (mapcar 'dom-node-name
529 (xpath-following-axis
530 (dom-element-first-child
531 (dom-document-element
532 xpath-test-document))))
533 '(chapter title \#text para \#text)))
534 (xpath-assert (equal (mapcar 'dom-node-name (xpath-following-axis title))
535 '(edition \#text authorgroup author firstname
536 \#text surname \#text chapter title
537 \#text para \#text)))
538 (xpath-assert (equal (mapcar 'dom-node-name
539 (xpath-following-sibling-axis title))
540 '(edition authorgroup)))
541 (xpath-assert (equal (mapcar 'dom-node-name (xpath-parent-axis title))
543 (xpath-assert (equal (mapcar 'dom-node-name
544 (xpath-preceding-axis
546 (dom-document-element
547 xpath-test-document))))
548 '(\#text surname \#text firstname author
549 authorgroup \#text edition \#text title
550 bookbiblio bookinfo)))
551 (xpath-assert (equal (mapcar 'dom-node-name
552 (xpath-preceding-sibling-axis
554 (dom-document-element
555 xpath-test-document))))
558 (mapcar 'dom-node-name
559 (xpath-preceding-sibling-axis
560 (dom-node-last-child ; authorgroup
561 (dom-node-first-child ; bookbiblio
562 (dom-node-first-child ; bookinfo
563 (dom-document-element xpath-test-document)))))) ; book
565 (xpath-assert (equal (xpath-self-axis title) (list title))))
567 (let ((node-list (dom-document-get-elements-by-tag-name
568 xpath-test-document '*)))
569 (xpath-assert (equal (mapcar 'dom-node-name
570 (xpath-name-filter node-list 'title))
572 (xpath-assert (equal (mapcar 'dom-node-value
573 (xpath-text-filter node-list))
574 '("My own book!" "First" "John" "Wiegley"
575 "A very small chapter"
576 "Wonder where the content is..."))))
578 (let ((root (dom-document-element xpath-test-document)))
579 (xpath-assert (equal (mapcar 'dom-node-name
580 (xpath-resolve-axis (list root)
582 '(bookinfo chapter)))
583 (xpath-assert (equal (mapcar 'dom-node-name
585 (dom-node-child-nodes root)
587 '(bookbiblio title para)))
588 (xpath-assert (equal (mapcar 'dom-node-text-content
589 (xpath-resolve root "descendant::title"))
591 "A very small chapter")))
593 (mapcar 'dom-node-text-content
595 "descendant::chapter/child::title"))
596 '("A very small chapter"))))
598 (xpath-assert (xpath-equal '(yes) t 5))
599 (xpath-assert (not (xpath-equal '(yes) nil 5)))
600 (xpath-assert (xpath-equal '(yes) 5.0 5))
601 (xpath-assert (not (xpath-equal '(yes) 4 5)))
602 (xpath-assert (xpath-equal '(yes) "5.0" 5))
603 (xpath-assert (xpath-equal '(yes) '(+ 3 2) 5))
604 ;; What is `xpath-resolve-args'??
605 ;; Is here some problem lurking?
606 ;; (xpath-assert (equal (xpath-resolve-args '(1 (= 1 1) 3))
608 ;; (xpath-assert (equal (xpath-resolve-args '(4 (+ 3 2) 6))
610 (xpath-assert (xpath-equal '(yes) '(1 2 3) 3))
611 (xpath-assert (not (xpath-equal '(yes) '(1 2 3) 4)))
612 (xpath-assert (xpath-equal '(yes) 3 '(1 2 3)))
613 (xpath-assert (not (xpath-equal '(yes) 4 '(1 2 3))))
614 (xpath-assert (xpath-equal '(yes) '(1 2 3) '(3 4 5)))
615 (xpath-assert (not (xpath-equal '(yes) '(1 2 3) '(4 5 6))))
617 (let ((root (dom-document-element xpath-test-document)))
619 (mapcar 'dom-node-name
620 (xpath-resolve root "child::*[position()=1]"))
623 (mapcar 'dom-node-name
624 (xpath-resolve root "child::*[position()=2]"))
628 "child::*[attribute::id=\"compiler\"]"))))
630 (let ((root (dom-document-element xpath-test-document)))
635 (xpath-resolve root "self::*[attribute::id=\"compiler\"]"))
639 (let ((node (car (dom-document-get-elements-by-tag-name
640 xpath-test-document 'edition))))
642 (mapcar 'dom-node-name
643 (xpath-resolve node "/descendant::title"))
646 ;; Abbreviated syntax.
647 (let ((root (dom-document-element xpath-test-document)))
651 'dom-node-text-content
652 (xpath-resolve root "descendant::authorgroup/author/firstname"))
655 ;; (let ((root (dom-document-element xpath-test-document)))
656 ;; (xpath-resolve root "descendant::authorgroup/author/firstname[position()]"))
659 (let ((node (car (dom-document-get-elements-by-tag-name
660 xpath-test-document 'edition))))
661 (xpath-assert (equal (mapcar 'dom-node-text-content
662 (xpath-resolve node "/chapter/title"))
663 '("A very small chapter"))))
669 ;;; xpath.el ends here.