1 ;;; idn.el --- Recommended Identifier Profiles for IDN
3 ;; Author: Lennart Borgman (lennart O borgman A gmail O com)
4 ;; Created: 2010-03-24 Wed
6 ;; Last-Updated: 2010-03-26 Fri
11 ;; Features that might be required by this library:
15 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19 ;; Functions for handling IDN chars defined by
20 ;; `http://www.unicode.org/reports/tr39/'.
22 ;; See `idn-is-recommended'.
24 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31 ;; This program is free software; you can redistribute it and/or
32 ;; modify it under the terms of the GNU General Public License as
33 ;; published by the Free Software Foundation; either version 3, or
34 ;; (at your option) any later version.
36 ;; This program is distributed in the hope that it will be useful,
37 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
38 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
39 ;; General Public License for more details.
41 ;; You should have received a copy of the GNU General Public License
42 ;; along with this program; see the file COPYING. If not, write to
43 ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth
44 ;; Floor, Boston, MA 02110-1301, USA.
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
50 ;; Fix-me: You have to change this if you are not using nXhtml:
51 (require 'nxhtml-base)
52 (defvar uts39-datadir (expand-file-name "etc/uts39/" nxhtml-install-dir))
56 (let* ((idnchars-file (expand-file-name "idnchars.txt" uts39-datadir))
57 (idnchars-old (find-buffer-visiting idnchars-file))
58 (idnchars-buf (or idnchars-old
59 (if (not (file-exists-p idnchars-file))
60 (message "Can't find file %S" idnchars-file)
61 (find-file-noselect idnchars-file))))
64 (group (repeat 4 (any xdigit)))
66 (group (repeat 4 (any xdigit))))))
69 (with-current-buffer idnchars-buf
73 (goto-char (point-min))
74 (while (re-search-forward range-patt nil t)
75 (let* ((str-beg (match-string 0))
76 (str-end (match-string 2))
77 (beg (string-to-number str-beg 16))
78 (end (or (when str-end (string-to-number str-end 16))
80 ;;(message "str-beg=%S str-end=%S" str-beg str-end)
81 (dotimes (ii (1+ (- end beg)))
82 (let ((num (+ ii beg)))
83 ;;(message "setting idn-char %s #%4x" num num)
84 (setq num-idn (1+ num-idn))
87 (unless idnchars-old (kill-buffer idnchars-buf))
88 (message "Found %d IDN chars" num-idn)
91 (defconst idn-char-vector
92 (let ((bv (make-bool-vector (* 256 256) nil)))
94 ;; (string-to-number "002D" 16)
95 ;; Make a quick sanity check:
96 (unless (and (not (aref bv 44))
98 (message "idn-char-vector: Bad idn data in file idnchars.txt"))
100 "Boolean vector with recommended IDN chars.")
103 ;;(idn-is-recommended 0)
104 ;;(idn-is-recommended 65535)
105 (defsubst idn-is-recommended (char)
106 "Return t if character CHAR is a recommended IDN char.
107 See URL `http://www.unicode.org/reports/tr39/'.
109 Data is initialized from the file idnchars.txt in the directory
110 `uts39-datadir'. This file is fetched from the above URL."
111 (aref idn-char-vector char))
114 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
115 ;;; Below are some help functions that can be commented out.
117 ;;(global-set-key [f9] 'idn-char-at-point)
118 (defun idn-char-at-point (pos)
119 "Tell if char at POS is an recommended IDN char.
120 Default POS is current point."
122 (let* ((this-char (char-after pos))
123 (recommended (idn-is-recommended this-char)))
124 (message "IDN char at point: %s (#%000x)" recommended this-char)))
126 (defun idn-list-chars ()
128 For more info see `idn-is-recommended'.
130 Note: This may crash Emacs currently, at least on w32."
132 (with-output-to-temp-buffer (help-buffer)
133 (help-setup-xref (list #'idn-list-chars) (interactive-p))
134 (with-current-buffer (help-buffer)
136 "Recommended Identifier Characters for IDN:\n\n")
139 (dotimes (nn (length idn-char-vector))
140 (when (aref idn-char-vector nn)
142 (setq col (mod (1+ col) 20))
143 (when (= col 0) (insert "\n "))
144 (insert " " (char-to-string nn))))
146 (format "There were %d IDN chars defined in `idn-char-vector'." cnt))
150 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;