html2mml.l -- FrameMaker support for HTML

Dan Connolly (connolly@pixel.convex.com)
Fri, 24 Jul 92 00:18:34 CDT


--cut-here

Here's a lisp program I cooked up to convert HTML files
to Frame's Maker Markup Language. It uses a real SGML
parser and a lisp interpreter, so you'll have to build
those if you don't have them handy.

I haven't tested it extensively, and I don't have a
down translator (MIF to HTML) yet... but I thought
I'd pass it along.

Oh... it assumes the HTML file conforms to the DTD
I sent out a while ago (I'd give you a WWW HREF/URL,
but the server's down right now and I can't find
it.)

Dan

--cut-here

;;; html2mml.l -- translate HyperText Markup Language to Maker Markup Language.
;;;
;;; USE
;;; sgmls file.html | xlisp html2mml.l >file.mml
;;;
;;; Where xlisp is Tom Almy's improved release of David Betz's XLISP 2.1,
;;; available in export.lcs.mit.edu:/contrib/winterp/xlisp/xlisp-2.1.almy.tar.Z
;;; and sgmls is built from
;;; ifi.uio.no:/pub/SGML/SGMLS/sgmls-0.8.tar
;;; aka
;;; ftp.uu.net:/pub/text-processing/sgml/sgmls-0.8.tar.Z
;;;
;;; The resulting file will have the OS Banner from XLisp at the
;;; top. For some reason, XLisp writes everything to stdout.
;;; I patched it to write diagnostic output to stderr. I'll have
;;; to get the patches incorporated soon.
;;;
;;; Anyway, just edit the banner out so the first line of the file is
;;; <MML ...>
;;;
;;; Then import the mml file to FrameMaker.
;;;

(setq *tracenable* t)
(setq *breakenable* t)

(princ "<MML \"from html2mml.l by connolly@convex.com\">\n")
(setq *para-tags*
'(title h1 h2 h3 h4 h5 body ol ul dl menu dir xmp listing))
(setq *literal-tags* '(xmp listing))

(setq *style-sheet* "
<!DefinePar Title
<Alignment r>
<plain> <pts 18> <bold>
>
<!DefinePar H1
<Alignment c>
<plain><pts 18>
<SpaceBefore 12pt><SpaceAfter 12pt>
>
<!DefinePar H2
<LeftIndent 0in><FirstIndent 0in>
<SpaceBefore 12pt><SpaceAfter 6pt>
<Alignment l>
<plain><pts 14>
>
<!DefinePar H3
<plain> <bold>
<LeftIndent 0.25in><FirstIndent 0.25in>
<SpaceBefore 6pt><SpaceAfter 3pt>
<Alignment l><pts 12>
>
<!DefinePar H4
<Alignment l>
>
<!DefinePar H5
<Alignment l>
>
<!DefinePar BODY
<LeftIndent 0.75in><FirstIndent 0.75in>
<SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
<Alignment l><plain><pts 12>
>
<!DefinePar OL
<FirstIndent 1.0in> <LeftIndent 1.5in>
<TabStops <TabStop 1.25in>>
<SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
<AutoNumber Yes> <NumberFormat \"<n+>\t\">
<Alignment l><plain><pts 12>
>
<!DefinePar UL
<SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
<FirstIndent 1.0in> <LeftIndent 1.5in>
<TabStops <TabStop 1.25in>>
<AutoNumber Yes> <NumberFormat \"o\\t\">
<Alignment l><plain><pts 12>
>
<!DefinePar DL
<AutoNumber No>
<SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
<FirstIndent 0.5in> <LeftIndent 2.5in>
<TabStops <TabStop 2.5in>>
<Alignment l><plain><pts 12>
>
<!DefinePar MENU
<AutoNumber No>
<WithNext yes><WithPrev yes>
<Alignment l><plain><pts 12>
>
<!DefinePar DIR
<AutoNumber No>
<Alignment l><plain><pts 12>
>
<!DefinePar XMP
<AutoNumber No>
<FirstIndent 0in> <LeftIndent 0in>
<Alignment l><plain>
<Family Courier><pts 9>
<SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
>
<!DefinePar LISTING
<AutoNumber No>
<FirstIndent 0in> <LeftIndent 0in>
<Alignment l><plain>
<Family Courier><pts 8>
<SpaceBefore 3pt><SpaceAfter 3pt><BlockSize 3>
>
")

(princ *style-sheet*)

;; From almy2.1
;; push and pop treat variable v as a stack
(defmacro push (v l)
`(setf ,l (cons ,v ,l)))

(defmacro pop (l)
`(prog1 (first ,l) (setf ,l (rest ,l))))

(defun start-para (stream tag)
(format stream "<~A>~%" tag)
)

(defun end-para (stream)
(format stream "~%~%")
)

(defun convert-data (literal)
(do ((c (read-char) (read-char))
d1 d2 d3
)
((eq c #\Newline) nil)

(cond ((eq c #\\)
(cond ((setf d1 (digit-char-p (setf c (read-char))))
(setf d2 (digit-char-p (read-char)))
(setf d3 (digit-char-p (read-char)))
(princ (int-char (+ d3 (* 8 (+ d2 (* 8 d1))))))
)
((eq c #\\) (princ "\\\\"))
((eq c #\n) (format t (if literal "<HardReturn>" " ")))
((eq c #\|) ;;nothing
)
((eq c #\s) (princ " "))
) )
((member c '(#\< #\>)) (format t "\\~A" c))
((eq c #\space) (format t (if literal "<HardSpace>" " ")))
((eql c 7) (format t "<tab>"))
(t (princ c))
) ) )

(defun html2mml ()
(do ((c (read-char) (read-char))
stack
tag
attrs
)
((null c)) ;; quit at end of file

(case c
(#\Newline ;; do nothing
)

(#\( (let ((gi (read))
)
;; open tag
(push gi stack)
(cond ((member gi *para-tags*)
(setq tag gi)
(start-para t tag)
)
((eq gi 'a)
(let ((href (second (assoc 'href attrs)))
)
;; watch out for >'s and 's
(format t "<Marker <MType 8> <MText `message www ~A'>><italic>" href)
)
)
)
(setq attrs nil)
))
(#\) (let ((gi (read))
)
(pop stack)
(cond ((member gi *para-tags*)
(setq tag nil))
((eq gi 'a) (format t "<noitalic>"))
((eq gi 'dt) (format t "<tab>"))
((member gi '(p dd li)) (format t "<par>"))
)
))

(#\-
(unless tag
(end-para t)
(dolist (gi stack)
(when (member gi *para-tags*)
(setq tag gi)
(return)
) )
(start-para t tag)
)
(convert-data (member tag *literal-tags*))
)

(#\& (let ((name (read))
)
;; name
))

(#\? (let ((pi (read-line))
)
;; processing instruction
))
(#\A (let ((name (read))
(token (read))
)
(case token
(IMPLIED ;; nothing
)
(CDATA (let ((data (read-line))
)
(push (list name data) attrs)
))
(TOKEN (let ((tokens (read-line)) ;;@@ read tokens til \n
)
;; tokens
))
(NOTATION (let ((name (read))
)
;; notation
))
(ENTITY (let ((name (read))
)
;; general entity
))
(ID (let ((id (read))
)
;; id
))
(IDREF (let ((ids (read-line)) ;; @@ read ids til \n
)
;; id's
))
) ) )

(#\D (read-line) ;; do like A but for external data name
)
) )
)

(html2mml)

--cut-here--