Factored Out Header Manipulation into extract_headers

Christopher Vollick [2011-12-08 21:50]
Factored Out Header Manipulation into extract_headers

I had a bug in the way I was extracting headers.
Notably, when a file contained lines that looked like headers in the
body of the file, it would extract those too.

I was extracting headers all over the place, and it made me sad.

So, I wrote ./extract_headers to do that for me, and now everything
just uses that.

Happier.
Filename
default.converted.do
default.tagfeed.do
default.tagindex.do
default.tagtemplate.do
extract_headers
tagindex.do
diff --git a/default.converted.do b/default.converted.do
index ba42109..abee25b 100644
--- a/default.converted.do
+++ b/default.converted.do
@@ -5,7 +5,7 @@ redo-ifchange "$1.augmented"

 # Pull the type out of the file to figure out how to build it
 # Also, convert the / in the type into _ for filesystem niceness
-type="$(sed -n '/^Content-Type:/s/^Content-Type:\(.*\)$/\1/p' < "$1.augmented" | tr -d ' ' | tr '/' '_')"
+type="$(./extract_headers "Content-Type" < "$1.augmented" | tr '/' '_')"
 if [ -z "$type" ]; then
 	echo "Warning: No Content Type. Assuming text/plain" >&2
 	type="text_plain"
diff --git a/default.tagfeed.do b/default.tagfeed.do
index 0ae357a..dee95e3 100644
--- a/default.tagfeed.do
+++ b/default.tagfeed.do
@@ -31,9 +31,9 @@ echo "	<icon>$icon</icon>"
 redo-ifchange "tagindex"
 grep "^$tag " < "tagindex" | cut -d ' ' -f 2- | tac | while read file; do
 	redo-ifchange "$file.converted"
-	title="$(sed -n 's/Title:[ 	]*\(.*\)/\1/p' < "$file.converted")"
-	mod_date="$(sed -n 's/Date-Modified:[ 	]*\(.*\)/\1/p' < "$file.converted")"
-	create_date="$(sed -n 's/Date-Created:[ 	]*\(.*\)/\1/p' < "$file.converted")"
+	title="$(./extract_headers "Title" < "$file.converted")"
+	mod_date="$(./extract_headers "Date-Modified" < "$file.converted")"
+	create_date="$(./extract_headers "Date-Created" < "$file.converted")"
 	echo '	<entry>'
 	echo "		<link rel='alternate' type='text/html' href='$this_tag/$file.html' />"
 	echo "		<id>$this_tag/$file.html</id>"
diff --git a/default.tagindex.do b/default.tagindex.do
index 0b24560..c343f36 100644
--- a/default.tagindex.do
+++ b/default.tagindex.do
@@ -44,7 +44,7 @@ echo '<ol>'
 redo-ifchange "tagindex"
 grep "^$tag " < "tagindex" | cut -d ' ' -f 2- | tac | while read file; do
 	redo-ifchange "$file.converted"
-	title="$(sed -n 's/Title:[ 	]*\(.*\)/\1/p' < "$file.converted")"
+	title="$(./extract_headers "Title" < "$file.converted")"
 	echo '<li>'
 	echo "<h2> <a href='$file.html'>$title</a> </h2>"
 	# Now spit out the body
diff --git a/default.tagtemplate.do b/default.tagtemplate.do
index 3a80783..7ff95ce 100644
--- a/default.tagtemplate.do
+++ b/default.tagtemplate.do
@@ -4,13 +4,13 @@
 echo '#!/bin/sh

 contents="$(cat)"
-title="$(echo "$contents" | sed -n "/^Title:/s/^Title:[ 	]//p")"
+title="$(echo "$contents" | ./extract_headers "Title")"
 body="$(echo "$contents" | sed "1,/^$/d")"
-next="$(echo "$contents" | sed -n "/^Next-Link:/s/^Next-Link:[ 	]*//p")"
-prev="$(echo "$contents" | sed -n "/^Previous-Link:/s/^Previous-Link:[ 	]*//p")"
-created="$(echo "$contents" | sed -n "/^Date-Created:/s/^Date-Created:[ 	]*//p")"
-modified="$(echo "$contents" | sed -n "/^Date-Modified:/s/^Date-Modified:[ 	]*//p")"
-id="$(echo "$contents" | sed -n "/^ID:/s/^ID:[ 	]*//p")"
+next="$(echo "$contents" | ./extract_headers "Next-Link")"
+prev="$(echo "$contents" | ./extract_headers "Previous-Link")"
+created="$(echo "$contents" | ./extract_headers "Date-Created")"
+modified="$(echo "$contents" | ./extract_headers "Date-Modified")"
+id="$(echo "$contents" | ./extract_headers "ID")"
 ' > "$3"
 echo "tag='$1'" >> "$3"

diff --git a/extract_headers b/extract_headers
new file mode 100755
index 0000000..1196c27
--- /dev/null
+++ b/extract_headers
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+header="$1"
+
+headers="$(sed '/^$/,$d' | grep "^$header:")"
+
+values="$(echo "$headers" | cut -d ':' -f 2- | sed 's/^ *\(\( *[^ ][^ ]*\)*\) *$/\1/g')"
+
+echo "$values"
diff --git a/tagindex.do b/tagindex.do
index ee0cd0b..42a33fc 100644
--- a/tagindex.do
+++ b/tagindex.do
@@ -18,8 +18,8 @@ ls *.mime | sed 's/.mime$//' | while read file; do
 	# Need augmented to get the ordering
 	# If, though, a new tag was added, then augmented will be different, so we'll be rerun to regenerate the index
 	redo-ifchange "$file.augmented"
-	c_time="$(sed -n 's/Date-Created: *\(.*\)/\1/p' < "$file.augmented")"
-	for tag in $(grep '^Tag:' < "$file.augmented" | sed 's/^Tag://'); do
+	c_time="$(./extract_headers "Date-Created" < "$file.augmented")"
+	for tag in $(./extract_headers "Tag" < "$file.augmented"); do
 		echo "$c_time $tag $file" >> $3
 	done
 done
ViewGit