html2article: Ignore empty anchor elements and obtain the original url from Google Docs
LGTM=adg R=adg, campoy CC=golang-codereviews https://golang.org/cl/127560043
This commit is contained in:
parent
f579fb3656
commit
e8a1924bfb
|
@ -14,6 +14,7 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
@ -210,7 +211,23 @@ func text(n *html.Node) string {
|
|||
indent(&buf, childText(n))
|
||||
buf.WriteByte('\n')
|
||||
case atom.A:
|
||||
fmt.Fprintf(&buf, "[[%s][%s]]", attr(n, "href"), childText(n))
|
||||
href, text := attr(n, "href"), childText(n)
|
||||
// Skip links with no text.
|
||||
if strings.TrimSpace(text) == "" {
|
||||
break
|
||||
}
|
||||
// Don't emit empty links.
|
||||
if strings.TrimSpace(href) == "" {
|
||||
buf.WriteString(text)
|
||||
break
|
||||
}
|
||||
// Use original url for Google Docs redirections.
|
||||
if u, err := url.Parse(href); err != nil {
|
||||
log.Println("parsing url %q: %v", href, err)
|
||||
} else if u.Host == "www.google.com" && u.Path == "/url" {
|
||||
href = u.Query().Get("q")
|
||||
}
|
||||
fmt.Fprintf(&buf, "[[%s][%s]]", href, text)
|
||||
case atom.Code:
|
||||
buf.WriteString(highlight(n, "`"))
|
||||
case atom.B:
|
||||
|
@ -233,6 +250,7 @@ func text(n *html.Node) string {
|
|||
}
|
||||
fmt.Fprintf(&buf, "\n.iframe %s 540 304\n", u)
|
||||
}
|
||||
case atom.Title:
|
||||
default:
|
||||
return true
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue