ソースを参照

a raw-to-the-extreme (but working) favicon fetcher

for embaressment. and improvement, but that will have to wait a bit.
Lucas Stadler 11 年 前
コミット
24bb491fa2
共有1 個のファイルを変更した130 個の追加0 個の削除を含む
  1. 130 0
      go/favicon.go

+ 130 - 0
go/favicon.go

@ -0,0 +1,130 @@
1
package main
2
3
import (
4
	"errors"
5
	"os"
6
	"net/http"
7
	"net/url"
8
	"fmt"
9
	"code.google.com/p/go.net/html"
10
	"code.google.com/p/cascadia"
11
)
12
13
var faviconCache = make(map[string]string)
14
15
func main() {
16
	http.HandleFunc("/favicon", HandleGetFavicon)
17
	err := http.ListenAndServe(":8080", nil)
18
19
	if err != nil {
20
		fmt.Println("error: ", err)
21
		os.Exit(1)
22
	}
23
}
24
25
func HandleGetFavicon (w http.ResponseWriter, r *http.Request) {
26
	url := r.URL.Query()["url"][0]
27
	favicon, err := GetFaviconCached(url)
28
	if err != nil {
29
		w.WriteHeader(http.StatusNotFound)
30
		w.Write([]byte(fmt.Sprint(err)))
31
		return
32
	}
33
	_, noRedirect := r.URL.Query()["no_redirect"]
34
	if noRedirect {
35
		w.Write([]byte(favicon))
36
		return
37
	}
38
	w.Header().Set("Location", favicon)
39
	w.WriteHeader(http.StatusSeeOther)
40
}
41
42
func GetFaviconCached(u string) (string, error) {
43
	parsed, err := url.Parse(u)
44
	var host = ""
45
	if err != nil {
46
		host = u
47
	} else {
48
		host = parsed.Host
49
	}
50
	faviconUrl, cached := faviconCache[host]
51
52
	if cached {
53
		return faviconUrl, nil
54
	}
55
56
	faviconUrl, err = GetFavicon(u)
57
	if err != nil {
58
		return faviconUrl, err
59
	}
60
61
	faviconCache[host] = faviconUrl
62
	return faviconUrl, nil
63
}
64
65
func GetFavicon(url string) (string, error) {
66
	if favicon, err := GetCanonicalFavicon(url); err == nil {
67
		fmt.Println("found favicon.ico")
68
		return favicon, nil
69
	}
70
71
	resp, err := http.Get(url)
72
	fmt.Println("get html", resp, err)
73
	if err != nil {
74
		return "", err
75
	}
76
	tree, err := html.Parse(resp.Body)
77
	fmt.Println("parse html", tree, err)
78
	if err != nil {
79
		return "", err
80
	}
81
82
	sel := cascadia.MustCompile("link[rel~=icon]")
83
	node := sel.MatchFirst(tree)
84
	if node == nil {
85
		return "", errors.New("no favicon found")
86
	}
87
88
	favicon, found := FindAttr("href", node.Attr)
89
	if !found {
90
		return "", errors.New("no link found")
91
	}
92
93
	return ToAbsolute(resp.Request.URL, favicon.Val), nil
94
}
95
96
func GetCanonicalFavicon(u string) (string, error) {
97
	parsed, err := url.Parse(u)
98
	if err != nil {
99
		return "", err
100
	}
101
	faviconUrl := fmt.Sprintf("%s://%s/favicon.ico", parsed.Scheme, parsed.Host)
102
	
103
	resp, err := http.Get(faviconUrl)
104
	fmt.Println("get favicon.ico", resp, err)
105
	if err != nil {
106
		return "", err
107
	}
108
	if resp.StatusCode >= 400 || resp.Header.Get("Content-Length") == "0" {
109
		return "", errors.New("no /favicon.ico")
110
	}
111
	fmt.Println("favicon.ico", resp.Request.URL.String(), faviconUrl)
112
	return resp.Request.URL.String(), nil
113
}
114
115
func FindAttr(name string, attributes []html.Attribute) (*html.Attribute, bool) {
116
	for _, attr := range attributes {
117
		if attr.Key == name {
118
			return &attr, true
119
		}
120
	}
121
	return nil, false
122
}
123
124
func ToAbsolute(base *url.URL, rawUrl string) string {
125
	url, err := url.Parse(rawUrl)
126
	if err != nil {
127
		return rawUrl
128
	}
129
	return base.ResolveReference(url).String()
130
}