forked from github/skk-dev.dict
474 lines
14 KiB
Plaintext
474 lines
14 KiB
Plaintext
;;; ZIPCODE-MK -- SKK 郵便番号辞書作成用プログラム -*- mode: emacs-lisp; coding: japanese-shift-jis-2004; -*-
|
||
|
||
;; Copyright (C) 2000-2005 SKK Development Team
|
||
|
||
;; Maintainer: SKK Development Team <skk@ring.gr.jp>
|
||
;; Keywords: japanese, mule, input method
|
||
|
||
;; This file is part of Daredevil SKK.
|
||
|
||
;; Daredevil SKK is free software; you can redistribute it and/or
|
||
;; modify it under the terms of the GNU General Public License as
|
||
;; published by the Free Software Foundation; either version 2, or
|
||
;; (at your option) any later version.
|
||
|
||
;; Daredevil SKK is distributed in the hope that it will be useful,
|
||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
;; General Public License for more details.
|
||
|
||
;; You should have received a copy of the GNU General Public License
|
||
;; along with Daredevil SKK, see the file COPYING. If not, write to
|
||
;; the Free Software Foundation Inc., 51 Franklin St, Fifth Floor,
|
||
;; Boston, MA 02110-1301, USA.
|
||
|
||
;;; Commentary:
|
||
|
||
;; 詳細については README.ja を参照してください。
|
||
|
||
;;; Code:
|
||
|
||
(require 'time-stamp)
|
||
|
||
(set-language-environment "Japanese")
|
||
(require 'japan-util)
|
||
|
||
(defvar TEMP_ZIPCODE nil)
|
||
(defvar TEMP_OFFICE nil)
|
||
|
||
(defvar KEN_ALL nil)
|
||
(defvar JIGYOSYO nil)
|
||
|
||
(defvar ZIPCODE "SKK-JISYO.zipcode")
|
||
(defvar OFFICE "SKK-JISYO.office.zipcode")
|
||
(defvar WORDS "words.zipcode")
|
||
|
||
(defvar JISYO_HEADER
|
||
"\
|
||
;;
|
||
;; Copyright: Public domain dictionary. Share and enjoy.
|
||
;;
|
||
;; Created: 24 Jul 2000
|
||
;; Time-stamp: <>
|
||
;;
|
||
")
|
||
|
||
(let ((workfiles '((TEMP_ZIPCODE . ".zipcode")
|
||
(TEMP_OFFICE . ".office")
|
||
(KEN_ALL . "ken_all.csv")
|
||
(JIGYOSYO . "jigyosyo.csv")))
|
||
(temp-dir (copy-sequence (car (nthcdr 4 command-line-args-left))))
|
||
(src-dir (copy-sequence (car (nthcdr 5 command-line-args-left)))))
|
||
(when (stringp temp-dir)
|
||
(setq temp-dir (expand-file-name temp-dir))
|
||
(unless (file-directory-p temp-dir)
|
||
(make-directory temp-dir 'parents))
|
||
(dolist (file workfiles)
|
||
(set (car file) (expand-file-name (cdr file) temp-dir)))
|
||
(setcdr (nthcdr 3 command-line-args-left) nil)))
|
||
|
||
;; 一般郵便番号用 .. make zipcode
|
||
(defun mkdic-zipcode ()
|
||
(let (*addr3* *addr4*
|
||
*stat*)
|
||
(set-buffer (get-buffer-create " *dic *"))
|
||
(erase-buffer)
|
||
(set-buffer (get-buffer-create " *csv *"))
|
||
(erase-buffer)
|
||
|
||
(let ((large-file-warning-threshold 20000000)
|
||
(coding-system-for-read 'shift_jis-2004))
|
||
(insert-file-contents KEN_ALL))
|
||
|
||
(goto-char (point-min))
|
||
(while (not (looking-at "^[0-9]"))
|
||
(forward-line))
|
||
|
||
(mkdic-get-line)
|
||
(while (eq (forward-line) 0)
|
||
(mkdic-get-line))
|
||
|
||
(set-buffer " *dic *")
|
||
(goto-char (point-min))
|
||
(insert "\
|
||
;; okuri-ari entries.
|
||
;; okuri-nasi entries.
|
||
")
|
||
(let ((coding-system-for-write 'euc-jis-2004-unix))
|
||
(write-region (point-min) (point-max) TEMP_ZIPCODE))))
|
||
|
||
(defun mkdic-get-line ()
|
||
(let ((i 0)
|
||
zip
|
||
addr1 addr2 addr3
|
||
stat)
|
||
(while (< i 9)
|
||
(cond
|
||
((= i 2)
|
||
(forward-char 1)
|
||
(setq zip (buffer-substring (point) (+ 7 (point)))))
|
||
((= i 6)
|
||
(forward-char 1)
|
||
(setq addr1 (buffer-substring (point) (1- (search-forward "\"")))))
|
||
((= i 7)
|
||
(forward-char 1)
|
||
(setq addr2 (buffer-substring (point) (1- (search-forward "\"")))))
|
||
((= i 8)
|
||
(forward-char 1)
|
||
(setq addr3 (buffer-substring (point) (1- (search-forward "\""))))
|
||
(when (or (string-equal "以下に掲載がない場合" addr3)
|
||
(string-match ".*一円$" addr3)
|
||
(string-match ".*の次に番地がくる場合$" addr3)
|
||
(string-match "^[0-9].*[0-9]$" addr3))
|
||
(setq addr3 ""))
|
||
;;
|
||
(when (string-equal addr3 "富岡(○○屋敷)")
|
||
;; 愛知県新城市屋敷地区?
|
||
;; 愛知県新城市屋敷は別番号の模様
|
||
(setq addr3 "富岡(大屋敷、中屋敷、東屋敷、西屋敷)"))
|
||
(when (string-match "(" addr3)
|
||
(let ((start (match-beginning 0)))
|
||
(cond
|
||
((and (string-match "階" addr3 start)
|
||
(not (string-match "(地階・階層不明)" addr3 start)))
|
||
(setq addr3
|
||
(if (and (> start 0)
|
||
(save-match-data
|
||
(string-match "[0-9]" (substring addr3
|
||
(1- start)
|
||
start))))
|
||
;; "サンシャイン60 1階" など
|
||
(concat (substring addr3 0 start)
|
||
" "
|
||
(substring addr3 (1+ start) (match-end 0)))
|
||
(concat (substring addr3 0 start)
|
||
(substring addr3 (1+ start) (match-end 0))))))
|
||
;;
|
||
((and (string-equal addr1 "京都府")
|
||
(string-match "^京都市" addr2))
|
||
(setq *addr4* (substring addr3 0 start))
|
||
(setq *addr3* (substring addr3 start))
|
||
(if (string-match ")$" *addr3*)
|
||
(progn
|
||
(setq addr3 (mkdic-process-kyoto *addr3* *addr4*))
|
||
(setq *stat* nil)
|
||
(setq *addr4* nil))
|
||
(setq addr3 nil)
|
||
(setq *stat* t)))
|
||
;;
|
||
((and (string-match ")" addr3)
|
||
(not (string-match
|
||
"地割\\|を除く\\|を含む\\|全域\\|[ア-ン]、[ア-ン]"
|
||
addr3)))
|
||
(setq *addr4* (substring addr3 0 start))
|
||
(setq *addr3* (substring addr3 start))
|
||
(when (string-equal *addr4* "甲、乙")
|
||
(setq *addr4* ""))
|
||
(if (and (string-match ".+)$" *addr3*)
|
||
(not (string-match "「\\|」\\|~\\|[0-9]" *addr3*)))
|
||
(progn
|
||
(setq addr3 (mkdic-process-kakkonai *addr3* *addr4*))
|
||
(setq *stat* nil)
|
||
(setq *addr4* nil))
|
||
(setq *addr4* nil)
|
||
(setq *addr3* nil)
|
||
(setq addr3 (substring addr3 0 start))
|
||
(setq *stat* t)))
|
||
;;
|
||
(t
|
||
(setq addr3 (substring addr3 0 start))
|
||
(setq *addr3* nil)
|
||
(setq *stat* t)))))
|
||
;;
|
||
(when (and addr3 (string-match ".*地割$" addr3))
|
||
(cond
|
||
((string-match "、" addr3)
|
||
(let ((start (match-beginning 0)))
|
||
(setq addr3 (concat (substring addr3 0 start)
|
||
"/" addr1 addr2
|
||
(substring addr3 (1+ start))))))
|
||
((string-match "~" addr3)
|
||
(let ((point (match-beginning 0))
|
||
fromstr tostr
|
||
from to
|
||
chimei str
|
||
pt1 pt2)
|
||
(setq fromstr (japanese-hankaku (substring addr3 0 point)))
|
||
(setq tostr (japanese-hankaku (substring addr3 (1+ point))))
|
||
(setq chimei (substring fromstr 0
|
||
(string-match "[0-9]" fromstr)))
|
||
(setq pt1 (match-beginning 0))
|
||
(when (string-match "地割$" fromstr)
|
||
(setq pt2 (match-beginning 0)))
|
||
(setq from (string-to-number (substring fromstr pt1 pt2)))
|
||
;;
|
||
(when (string-match "[0-9]" tostr)
|
||
(setq pt1 (match-beginning 0)))
|
||
(when (string-match "地割$" tostr)
|
||
(setq pt2 (match-beginning 0)))
|
||
(setq to (string-to-number (substring tostr pt1 pt2)))
|
||
;;
|
||
(let ((i from))
|
||
(while (<= i to)
|
||
(cond
|
||
((= i from)
|
||
(setq addr3 (concat chimei
|
||
(japanese-zenkaku (format "%d" i))
|
||
"地割")))
|
||
(t
|
||
(setq addr3 (concat addr3 "/" addr1 addr2 chimei
|
||
(japanese-zenkaku (format "%d" i))
|
||
"地割"))))
|
||
(setq i (1+ i))))))))
|
||
;;
|
||
(when (and addr3 (string-match ")$" addr3))
|
||
(cond
|
||
((and *addr4* *addr3*)
|
||
(setq *addr3* (concat *addr3* addr3))
|
||
(setq addr3 (mkdic-process-kyoto *addr3* *addr4*))
|
||
(setq *stat* nil)
|
||
(setq *addr4* nil))
|
||
((and *addr3* (setq addr3 *addr3*))
|
||
(setq *stat* nil))))
|
||
;;
|
||
(when (member addr3 '("岩田町居村、北郷中"
|
||
"岩田町宮下、道合"
|
||
"飯村町西山、高山"))
|
||
;; いずれも愛知県豊橋市
|
||
(setq addr3 (concat (substring addr3 0
|
||
(progn
|
||
(string-match "、" addr3)
|
||
(match-beginning 0)))
|
||
"/" addr1 addr2
|
||
(substring addr3 0
|
||
(progn
|
||
(string-match "町" addr3)
|
||
(match-end 0)))
|
||
(substring addr3
|
||
(progn
|
||
(string-match "、" addr3)
|
||
(1+ (match-end 0)))))))
|
||
;;
|
||
(cond
|
||
((and *stat* *addr4* *addr3* addr3)
|
||
(setq *addr3* (concat *addr3* addr3))
|
||
(setq addr3 nil))
|
||
((and addr3 (string-match "、" addr3))
|
||
(if *stat*
|
||
(when *addr3*
|
||
(setq addr3 *addr3*))
|
||
(setq addr3 "")))
|
||
(t nil)))
|
||
(t nil))
|
||
;;
|
||
(let ((search (search-forward "," nil t)))
|
||
(if search
|
||
(setq i (1+ i))
|
||
(setq i 9))))
|
||
;;
|
||
(cond
|
||
((and *stat* addr3)
|
||
(setq *addr3* addr3))
|
||
((not *addr4*)
|
||
(setq *addr3* nil)))
|
||
;;
|
||
(with-current-buffer " *dic *"
|
||
(when (and zip addr1 addr2 addr3)
|
||
(insert zip " /" addr1 addr2 addr3 "/\n")))))
|
||
|
||
(defun mkdic-process-kyoto (nantaras cho)
|
||
(let (addr)
|
||
(cond
|
||
((string-match "\\(~\\|(丁目)\\|その他\\|番地)$\\)"
|
||
nantaras)
|
||
(setq nantaras nil))
|
||
|
||
((string-match "([0-9]丁目)" nantaras)
|
||
(setq cho (concat cho (substring nantaras 1 (1- (length nantaras)))))
|
||
(setq nantaras nil))
|
||
|
||
(t
|
||
(setq nantaras (split-string (substring nantaras 1
|
||
(1- (length nantaras)))
|
||
"、"))))
|
||
|
||
(cond
|
||
((not nantaras)
|
||
(setq addr cho))
|
||
|
||
(t
|
||
(setq addr (concat (car nantaras) cho))
|
||
(dolist (nantara (cdr nantaras))
|
||
(setq addr (concat addr "/" addr1 addr2 nantara cho)))
|
||
addr))))
|
||
|
||
(defun mkdic-process-kakkonai (detail cho)
|
||
(let (addr)
|
||
(cond
|
||
((string-match "\\(~\\|(丁目)\\|番地)$\\)"
|
||
detail)
|
||
(setq detail nil))
|
||
((string-match "([0-9]丁目)" detail)
|
||
(setq cho (concat cho (substring detail 1 (1- (length detail)))))
|
||
(setq detail nil))
|
||
((string-match "(地階・階層不明)" detail)
|
||
(setq detail (list "地階")))
|
||
(t
|
||
(setq detail (split-string (substring detail 1 (1- (length detail)))
|
||
"、"))))
|
||
(cond
|
||
((not detail)
|
||
(setq addr cho))
|
||
(t
|
||
(unless (or (member "" detail)
|
||
(memq nil detail))
|
||
(setq detail (cons "" detail)))
|
||
(setq addr (concat cho (car detail)))
|
||
(dolist (nantara (cdr detail))
|
||
(unless (string-match "その他" nantara)
|
||
(setq addr (concat addr "/" addr1 addr2 cho nantara))))
|
||
addr))))
|
||
|
||
;; 事業所用 .. make office
|
||
(defun mkdic-office ()
|
||
(let (*addr3* *addr4* *stat*)
|
||
(set-buffer (get-buffer-create " *dic *"))
|
||
(erase-buffer)
|
||
|
||
(set-buffer (get-buffer-create " *csv *"))
|
||
(erase-buffer)
|
||
|
||
(let ((coding-system-for-read 'binary))
|
||
(insert-file-contents JIGYOSYO))
|
||
|
||
;; workaround 2007-01-16
|
||
;; IBM 拡張文字 fab1 を`崎'(8de8)へ置換
|
||
;; (format "%x-%x" #o372 #o261) => "fa-b1"
|
||
(goto-char (point-min))
|
||
(while (search-forward (decode-coding-string "\372\261" 'japanese-cp932) nil t)
|
||
(replace-match "")
|
||
(insert (decode-coding-string "\215\350" 'sjis)))
|
||
|
||
;; workaround 2007-02-12
|
||
;; IBM 拡張文字 fadc を `しょう'(ひらがな3文字)へ置換
|
||
(goto-char (point-min))
|
||
(while (search-forward (decode-coding-string "\372\334" 'japanese-cp932) nil t)
|
||
(replace-match "")
|
||
(insert (decode-coding-string "\202\265\202\345\202\244" 'sjis)))
|
||
|
||
;; workaround 2007-05-11
|
||
;; fa 9c を `塚' へ置換
|
||
(goto-char (point-min))
|
||
(while (search-forward (decode-coding-string "\372\234" 'japanese-cp932) nil t)
|
||
(replace-match "")
|
||
(insert (decode-coding-string "\222\313" 'sjis)))
|
||
|
||
;; workaround 2011-06-09
|
||
;; ハシゴ高(IBM 拡張文字 fbfc)を `高' へ置換
|
||
(goto-char (point-min))
|
||
(while (search-forward (decode-coding-string "\373\374" 'japanese-cp932) nil t)
|
||
(replace-match "")
|
||
(insert (decode-coding-string "\215\202" 'sjis)))
|
||
|
||
(decode-coding-region (point-min) (point-max) 'shift_jis-2004)
|
||
(goto-char (point-min))
|
||
(while (not (looking-at "^[0-9]"))
|
||
(forward-line))
|
||
|
||
(mkdic-office-get-line)
|
||
(while (eq (forward-line) 0)
|
||
(mkdic-office-get-line))
|
||
|
||
(set-buffer " *dic *")
|
||
(goto-char (point-min))
|
||
(insert "\
|
||
;; okuri-ari entries.
|
||
;; okuri-nasi entries.
|
||
")
|
||
(let ((coding-system-for-write 'euc-jis-2004-unix))
|
||
(write-region (point-min) (point-max) TEMP_OFFICE))))
|
||
|
||
(defun mkdic-office-get-line ()
|
||
(let ((i 0)
|
||
zip name
|
||
addr1 addr2 addr3 addr4)
|
||
(while (< i 9)
|
||
(cond ((= i 7)
|
||
(forward-char 1)
|
||
(setq zip (buffer-substring (point) (+ 7 (point)))))
|
||
((= i 2)
|
||
(forward-char 1)
|
||
(setq name (buffer-substring (point) (1- (search-forward "\"")))))
|
||
((= i 3)
|
||
(forward-char 1)
|
||
(setq addr1 (buffer-substring (point) (1- (search-forward "\"")))))
|
||
((= i 4)
|
||
(forward-char 1)
|
||
(setq addr2 (buffer-substring (point) (1- (search-forward "\"")))))
|
||
((= i 5)
|
||
(forward-char 1)
|
||
(setq addr3 (buffer-substring (point) (1- (search-forward "\"")))))
|
||
((= i 6)
|
||
(forward-char 1)
|
||
(setq addr4 (buffer-substring (point) (1- (search-forward "\""))))))
|
||
(let ((search (search-forward "," nil t)))
|
||
(setq i (if search
|
||
(1+ i)
|
||
9))))
|
||
|
||
(with-current-buffer " *dic *"
|
||
(when (and zip name addr1 addr2 addr3 addr4)
|
||
(insert zip " /" name " @ " addr1 addr2 addr3 addr4 "/\n")))))
|
||
|
||
(defun mkdic-words ()
|
||
(let ((dics '("SKK-JISYO.office.zipcode"
|
||
"SKK-JISYO.zipcode"))
|
||
str)
|
||
(set-buffer (get-buffer-create " *words *"))
|
||
(erase-buffer)
|
||
;;
|
||
(set-buffer (get-buffer-create " *dic *"))
|
||
;;
|
||
(dolist (dic dics)
|
||
(erase-buffer)
|
||
(insert-file-contents dic)
|
||
(goto-char (point-min))
|
||
(while (re-search-forward "^[0-9][0-9][0-9][0-9][0-9][0-9][0-9] " nil t)
|
||
(setq str (buffer-substring (match-beginning 0) (1- (match-end 0))))
|
||
(with-current-buffer (get-buffer " *words *")
|
||
(goto-char (point-max))
|
||
(insert (format "%s\n" str)))))
|
||
;;
|
||
(set-buffer (get-buffer " *words *"))
|
||
(sort-lines nil (point-min) (point-max))
|
||
(set-buffer-file-coding-system 'raw-text-unix)
|
||
(write-region (point-min) (point-max) WORDS)))
|
||
|
||
(defun mkdic-zipcode-header ()
|
||
(with-temp-buffer
|
||
(insert "\
|
||
;; -*- coding: euc-jis-2004; -*-
|
||
;; SKK-JISYO.zipcode --- 7-digit ZIP code dictionary for SKK
|
||
" JISYO_HEADER)
|
||
(let ((time-stamp-format "%02d %03b %y")
|
||
(time-stamp-time-zone "GMT")
|
||
(system-time-locale "C"))
|
||
(time-stamp))
|
||
(set-buffer-file-coding-system 'euc-jis-2004-unix)
|
||
(write-region (point-min) (point-max) ZIPCODE)))
|
||
|
||
(defun mkdic-office-header ()
|
||
(with-temp-buffer
|
||
(insert "\
|
||
;; -*- coding: euc-jis-2004; -*-
|
||
;; SKK-JISYO.office.zipcode --- 7-digit ZIP code (offices) dictionary for SKK
|
||
" JISYO_HEADER)
|
||
(let ((time-stamp-format "%02d %03b %y")
|
||
(time-stamp-time-zone "GMT")
|
||
(system-time-locale "C"))
|
||
(time-stamp))
|
||
(set-buffer-file-coding-system 'euc-jis-2004-unix)
|
||
(write-region (point-min) (point-max) OFFICE)))
|
||
|
||
;; ZIPCODE-MK ends here
|