html2article: Ignore empty anchor elements and obtain the original url from Google Docs
LGTM=adg R=adg, campoy CC=golang-codereviews https://golang.org/cl/127560043
This commit is contained in:
parent
f579fb3656
commit
e8a1924bfb
|
@ -14,6 +14,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -210,7 +211,23 @@ func text(n *html.Node) string {
|
||||||
indent(&buf, childText(n))
|
indent(&buf, childText(n))
|
||||||
buf.WriteByte('\n')
|
buf.WriteByte('\n')
|
||||||
case atom.A:
|
case atom.A:
|
||||||
fmt.Fprintf(&buf, "[[%s][%s]]", attr(n, "href"), childText(n))
|
href, text := attr(n, "href"), childText(n)
|
||||||
|
// Skip links with no text.
|
||||||
|
if strings.TrimSpace(text) == "" {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Don't emit empty links.
|
||||||
|
if strings.TrimSpace(href) == "" {
|
||||||
|
buf.WriteString(text)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Use original url for Google Docs redirections.
|
||||||
|
if u, err := url.Parse(href); err != nil {
|
||||||
|
log.Println("parsing url %q: %v", href, err)
|
||||||
|
} else if u.Host == "www.google.com" && u.Path == "/url" {
|
||||||
|
href = u.Query().Get("q")
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&buf, "[[%s][%s]]", href, text)
|
||||||
case atom.Code:
|
case atom.Code:
|
||||||
buf.WriteString(highlight(n, "`"))
|
buf.WriteString(highlight(n, "`"))
|
||||||
case atom.B:
|
case atom.B:
|
||||||
|
@ -233,6 +250,7 @@ func text(n *html.Node) string {
|
||||||
}
|
}
|
||||||
fmt.Fprintf(&buf, "\n.iframe %s 540 304\n", u)
|
fmt.Fprintf(&buf, "\n.iframe %s 540 304\n", u)
|
||||||
}
|
}
|
||||||
|
case atom.Title:
|
||||||
default:
|
default:
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue