#!/bin/sh # **shocco** is a quick-and-dirty, literate-programming-style documentation # generator written for and in __POSIX shell__. It borrows liberally from # [Docco][do], the original Q&D literate-programming-style doc generator. # # `shocco(1)` reads shell scripts and produces annotated source documentation # in HTML format. Comments are formatted with Markdown and presented # alongside syntax highlighted code so as to give an annotation effect. This # page is the result of running `shocco` against [its own source file][sh]. # # shocco is built with `make(1)` and installs under `/usr/local` by default: # # git clone git://github.com/rtomayko/shocco.git # cd shocco # make # sudo make install # # or just copy 'shocco' wherever you need it # # Once installed, the `shocco` program can be used to generate documentation # for a shell script: # # shocco shocco.sh # # The generated HTML is written to `stdout`. # # [do]: http://jashkenas.github.com/docco/ # [sh]: https://github.com/rtomayko/shocco/blob/master/shocco.sh#commit # Usage and Prerequisites # ----------------------- # The most important line in any shell program. set -e # There's a lot of different ways to do usage messages in shell scripts. # This is my favorite: you write the usage message in a comment -- # typically right after the shebang line -- *BUT*, use a special comment prefix # like `#/` so that its easy to pull these lines out. # # This also illustrates one of shocco's corner features. Only comment lines # padded with a space are considered documentation. A `#` followed by any # other character is considered code. # #/ Usage: shocco [-t ] [<source>] #/ Create literate-programming-style documentation for shell scripts. #/ #/ The shocco program reads a shell script from <source> and writes #/ generated documentation in HTML format to stdout. When <source> is #/ '-' or not specified, shocco reads from stdin. # This is the second part of the usage message technique: `grep` yourself # for the usage message comment prefix and then cut off the first few # characters so that everything lines up. expr -- "$*" : ".*--help" >/dev/null && { grep '^#/' <"$0" | cut -c4- exit 0 } # A custom title may be specified with the `-t` option. We use the filename # as the title if none is given. test "$1" = '-t' && { title="$2" shift;shift } # Next argument should be the `<source>` file. Grab it, and use its basename # as the title if none was given with the `-t` option. file="$1" : ${title:=$(basename "$file")} # These are replaced with the full paths to real utilities by the # configure/make system. MARKDOWN='/usr/bin/markdown_py' PYGMENTIZE='/usr/bin/pygmentize' # On GNU systems, csplit doesn't elide empty files by default: CSPLITARGS=$( (csplit --version 2>/dev/null | grep -i gnu >/dev/null) && echo "--elide-empty-files" || true ) # We're going to need a `markdown` command to run comments through. This can # be [Gruber's `Markdown.pl`][md] (included in the shocco distribution) or # Discount's super fast `markdown(1)` in C. Try to figure out if either are # available and then bail if we can't find anything. # # [md]: http://daringfireball.net/projects/markdown/ # [ds]: http://www.pell.portland.or.us/~orc/Code/discount/ command -v "$MARKDOWN" >/dev/null || { if command -v Markdown.pl >/dev/null then alias markdown='Markdown.pl' elif test -f "$(dirname $0)/Markdown.pl" then alias markdown="perl $(dirname $0)/Markdown.pl" else echo "$(basename $0): markdown command not found." 1>&2 exit 1 fi } # Check that [Pygments][py] is installed for syntax highlighting. # # This is a fairly hefty prerequisite. Eventually, I'd like to fallback # on a simple non-highlighting preformatter when Pygments isn't available. For # now, just bail out if we can't find the `pygmentize` program. # # [py]: http://pygments.org/ command -v "$PYGMENTIZE" >/dev/null || { echo "$(basename $0): pygmentize command not found." 1>&2 exit 1 } # Work and Cleanup # ---------------- # Make sure we have a `TMPDIR` set. The `:=` parameter expansion assigns # the value if `TMPDIR` is unset or null. : ${TMPDIR:=/tmp} # Create a temporary directory for doing work. Use `mktemp(1)` if # available; but, since `mktemp(1)` is not POSIX specified, fallback on naive # (and insecure) temp dir generation using the program's basename and pid. : ${WORK:=$( if command -v mktemp 1>/dev/null 2>&1 then mktemp -d "$TMPDIR/$(basename $0).XXXXXXXXXX" else dir="$TMPDIR/$(basename $0).$$" mkdir "$dir" echo "$dir" fi )} # We want to be absolutely sure we're not going to do something stupid like # use `.` or `/` as a work dir. Better safe than sorry. test -z "$WORK" -o "$WORK" = '/' && { echo "$(basename $0): could not create a temp work dir." exit 1 } # We're about to create a ton of shit under our `$WORK` directory. Register # an `EXIT` trap that cleans everything up. This guarantees we don't leave # anything hanging around unless we're killed with a `SIGKILL`. trap "rm -rf $WORK" 0 # Preformatting # ------------- # # Start out by applying some light preformatting to the `<source>` file to # make the code and doc formatting phases a bit easier. The result of this # pipeline is written to a temp file under the `$WORK` directory so we can # take a few passes over it. # Get a pipeline going with the `<source>` data. We write a single blank # line at the end of the file to make sure we have an equal number of code/comment # pairs. # Folding.el support: turn {{{ folds }}} into titles -jrml (cat "$file" \ | sed -e 's/^# {{{/# #/' -e 's/^# }}}.*/# --------------/' \ | awk '/\(\) {$/ { print "# ### " $1 } {print $0}' \ && printf "\n\n# \n\n") | # We want the shebang line and any code preceding the first comment to # appear as the first code block. This inverts the normal flow of things. # Usually, we have comment text followed by code; in this case, we have # code followed by comment text. # # Read the first code and docs headers and flip them so the first docs block # comes before the first code block. ( lineno=0 codebuf=;codehead= docsbuf=;docshead= while read -r line do # Issue a warning if the first line of the script is not a shebang # line. This can screw things up and wreck our attempt at # flip-flopping the two headings. lineno=$(( $lineno + 1 )) test $lineno = 1 && ! expr "$line" : "#!.*" >/dev/null && echo "$(basename $0): $(file):1 [warn] shebang! line missing." 1>&2 # Accumulate comment lines into `$docsbuf` and code lines into # `$codebuf`. Only lines matching `/#(?: |$)/` are considered doc # lines. if expr "$line" : '# ' >/dev/null || test "$line" = "#" then docsbuf="$docsbuf$line " else codebuf="$codebuf$line " fi # If we have stuff in both `$docsbuf` and `$codebuf`, it means # we're at some kind of boundary. If `$codehead` isn't set, we're at # the first comment/doc line, so store the buffer to `$codehead` and # keep going. If `$codehead` *is* set, we've crossed into another code # block and are ready to output both blocks and then straight pipe # everything by `exec`'ing `cat`. if test -n "$docsbuf" -a -n "$codebuf" then if test -n "$codehead" then docshead="$docsbuf" docsbuf="" printf "%s" "$docshead" printf "%s" "$codehead" echo "$line" exec cat else codehead="$codebuf" codebuf= fi fi done # We made it to the end of the file without a single comment line, or # there was only a single comment block ending the file. Output our # docsbuf or a fake comment and then the codebuf or codehead. echo "${docsbuf:-#}" echo "${codebuf:-"$codehead"}" ) | # Remove comment leader text from all comment lines. Then prefix all # comment lines with "DOCS" and interpreted / code lines with "CODE". # The stream text might look like this after moving through the `sed` # filters: # # CODE #!/bin/sh # CODE #/ Usage: shocco <file> # DOCS Docco for and in POSIX shell. # CODE # CODE PATH="/bin:/usr/bin" # CODE # DOCS Start by numbering all lines in the input file... # ... # # Once we pass through `sed`, save this off in our work directory so # we can take a few passes over it. sed -n ' s/^/:/ s/^:[ ]\{0,\}# /DOCS /p s/^:[ ]\{0,\}#$/DOCS /p s/^:/CODE /p ' > "$WORK/raw" # Now that we've read and formatted our input file for further parsing, # change into the work directory. The program will finish up in there. cd "$WORK" # First Pass: Comment Formatting # ------------------------------ # Start a pipeline going on our preformatted input. # Replace all CODE lines with entirely blank lines. We're not interested # in code right now, other than knowing where comments end and code begins # and code begins and comments end. sed 's/^CODE.*//' < raw | # Now squeeze multiple blank lines into a single blank line. # # __TODO:__ `cat -s` is not POSIX and doesn't squeeze lines on BSD. Use # the sed line squeezing code mentioned in the POSIX `cat(1)` manual page # instead. cat -s | # At this point in the pipeline, our stream text looks something like this: # # DOCS Now that we've read and formatted ... # DOCS change into the work directory. The rest ... # DOCS in there. # # DOCS First Pass: Comment Formatting # DOCS ------------------------------ # # Blank lines represent code segments. We want to replace all blank lines # with a dividing marker and remove the "DOCS" prefix from docs lines. sed ' s/^$/##### DIVIDER/ s/^DOCS //' | # The current stream text is suitable for input to `markdown(1)`. It takes # our doc text with embedded `DIVIDER`s and outputs HTML. $MARKDOWN | # Now this where shit starts to get a little crazy. We use `csplit(1)` to # split the HTML into a bunch of individual files. The files are named # as `docs0000`, `docs0001`, `docs0002`, ... Each file includes a single # doc *section*. These files will sit here while we take a similar pass over # the source code. ( csplit -sk \ $CSPLITARGS \ -f docs \ -n 4 \ - '/<h5>DIVIDER<\/h5>/' '{9999}' \ 2>/dev/null || true ) # Second Pass: Code Formatting # ---------------------------- # # This is exactly like the first pass but we're focusing on code instead of # comments. We use the same basic technique to separate the two and isolate # the code blocks. # Get another pipeline going on our performatted input file. # Replace DOCS lines with blank lines. sed 's/^DOCS.*//' < raw | # Squeeze multiple blank lines into a single blank line. cat -s | # Replace blank lines with a `DIVIDER` marker and remove prefix # from `CODE` lines. sed ' s/^$/# DIVIDER/ s/^CODE //' | # Now pass the code through `pygmentize` for syntax highlighting. We tell it # the the input is `sh` and that we want HTML output. $PYGMENTIZE -l sh -f html -O encoding=utf8 | # Post filter the pygments output to remove partial `<pre>` blocks. We add # these back in at each section when we build the output document. sed ' s/<div class="highlight"><pre>// s/^<\/pre><\/div>//' | # Again with the `csplit(1)`. Each code section is written to a separate # file, this time with a `codeXXX` prefix. There should be the same number # of `codeXXX` files as there are `docsXXX` files. ( DIVIDER='/<span class="c"># DIVIDER</span>/' csplit -sk \ $CSPLITARGS \ -f code \ -n 4 - \ "$DIVIDER" '{9999}' \ 2>/dev/null || true ) # At this point, we have separate files for each docs section and separate # files for each code section. # HTML Template # ------------- # Create a function for apply the standard [Docco][do] HTML layout, using # [jashkenas][ja]'s gorgeous CSS for styles. Wrapping the layout in a function # lets us apply it elsewhere simply by piping in a body. # # [ja]: http://github.com/jashkenas/ # [do]: http://jashkenas.github.com/docco/ layout () { cat <<HTML <!DOCTYPE html> <html> <head> <meta http-equiv='content-type' content='text/html;charset=utf-8'> <title>$1

$1

$(cat)
HTML } # Recombining # ----------- # Alright, we have separate files for each docs section and separate # files for each code section. We've defined a function to wrap the # results in the standard layout. All that's left to do now is put # everything back together. # Before starting the pipeline, decide the order in which to present the # files. If `code0000` is empty, it should appear first so the remaining # files are presented `docs0000`, `code0001`, `docs0001`, and so on. If # `code0000` is not empty, `docs0000` should appear first so the files # are presented `docs0000`, `code0000`, `docs0001`, `code0001` and so on. # # Ultimately, this means that if `code0000` is empty, the `-r` option # should not be provided with the final `-k` option group to `sort`(1) in # the pipeline below. if stat -c"%s" /dev/null >/dev/null 2>/dev/null ; then # GNU stat [ "$(stat -c"%s" "code0000")" = 0 ] && sortopt="" || sortopt="r" else # BSD stat [ "$(stat -f"%z" "code0000")" = 0 ] && sortopt="" || sortopt="r" fi # Start the pipeline with a simple list of split out temp filename. One file # per line. ls -1 docs[0-9]* code[0-9]* 2>/dev/null | # Now sort the list of files by the *number* first and then by the type. The # list will look something like this when `sort(1)` is done with it: # # docs0000 # code0000 # docs0001 # code0001 # docs0002 # code0002 # ... # sort -n -k"1.5" -k"1.1$sortopt" | # And if we pass those files to `cat(1)` in that order, it concatenates them # in exactly the way we need. `xargs(1)` reads from `stdin` and passes each # line of input as a separate argument to the program given. # # We could also have written this as: # # cat $(ls -1 docs* code* | sort -n -k1.5 -k1.1r) # # I like to keep things to a simple flat pipeline when possible, hence the # `xargs` approach. xargs cat | # Run a quick substitution on the embedded dividers to turn them into table # rows and cells. This also wraps each code block in a `
` # so that the CSS kicks in properly. { DOCSDIVIDER='
DIVIDER
' DOCSREPLACE='
' CODEDIVIDER='# DIVIDER' CODEREPLACE='
'
    sed "
        s@${DOCSDIVIDER}@${DOCSREPLACE}@
        s@${CODEDIVIDER}@${CODEREPLACE}@
    "
}                                            |

# Pipe our recombined HTML into the layout and let it write the result to
# `stdout`.
layout "$title"

# More
# ----
#
# **shocco** is the third tool in a growing family of quick-and-dirty,
# literate-programming-style documentation generators:
#
#   * [Docco][do] - The original. Written in CoffeeScript and generates
#     documentation for CoffeeScript, JavaScript, and Ruby.
#   * [Rocco][ro] - A port of Docco to Ruby.
#
# If you like this sort of thing, you may also find interesting Knuth's
# massive body of work on literate programming:
#
#   * [Knuth: Literate Programming][kn]
#   * [Literate Programming on Wikipedia][wi]
#
# [ro]: http://rtomayko.github.com/rocco/
# [do]: http://jashkenas.github.com/docco/
# [kn]: http://www-cs-faculty.stanford.edu/~knuth/lp.html
# [wi]: http://en.wikipedia.org/wiki/Literate_programming

# Copyright (C) [Ryan Tomayko ](http://tomayko.com/about)
# This is Free Software distributed under the MIT license. :