diff options
author | Evgeny Kuznetsov <evgeny@kuznetsov.md> | 2020-02-05 18:48:03 +0300 |
---|---|---|
committer | Evgeny Kuznetsov <evgeny@kuznetsov.md> | 2020-02-05 18:48:03 +0300 |
commit | 2c054e0e4dbc94ecff95d62fd8a06b98405ff4d4 (patch) | |
tree | d2fcf09e0810c148219dc32171f1c5d193eb04dc | |
parent | 85caee2d9cad1f022e7101b1a04016f92fda64d1 (diff) | |
download | radiorus-rss-2c054e0e4dbc94ecff95d62fd8a06b98405ff4d4.tar.gz radiorus-rss-2c054e0e4dbc94ecff95d62fd8a06b98405ff4d4.zip |
parse regexes safely
-rw-r--r-- | main.go | 38 | ||||
-rw-r--r-- | main_test.go | 39 |
2 files changed, 68 insertions, 9 deletions
@@ -113,12 +113,12 @@ func getFeed(url string) (feed *feeds.Feed) { } func populateFeed(feed *feeds.Feed, page []byte) (err error) { - titleMatch := programNameRe.FindSubmatch(page) - if len(titleMatch) < 1 { - return fmt.Errorf("bad programme page") + title, err := parseSingle(page, programNameRe) + if err != nil { + return fmt.Errorf("bad programme page: title not found") } + feed.Title = stripLink(string(title)) - feed.Title = stripLink(string(titleMatch[1])) programImage := programImageRe.FindSubmatch(page) feed.Image = &feeds.Image{ Link: feed.Link.Href, @@ -133,8 +133,10 @@ func populateFeed(feed *feeds.Feed, page []byte) (err error) { if len(episodeUrlRe.FindAllSubmatch(episode, -1)) > 1 { return errBadEpisode } - episodeUrl := urlPrefix + string(episodeUrlRe.FindSubmatch(episode)[1]) - episodeTitle := string(episodeTitleRe.FindSubmatch(episode)[1]) + url, _ := parseSingle(episode, episodeUrlRe) + episodeUrl := urlPrefix + string(url) + title, _ := parseSingle(episode, episodeTitleRe) + episodeTitle := string(title) enclosure := findEnclosure(episode) date := findDate(episode) @@ -149,6 +151,24 @@ func populateFeed(feed *feeds.Feed, page []byte) (err error) { return nil } +func parse(src []byte, re *regexp.Regexp, n int) (out [][]byte, err error) { + match := re.FindSubmatch(src) + if len(match) != n+1 { + for i := 0; i < n; i++ { + out = append(out, []byte{}) + } + return out, errCantParse + } + + return match[1:], nil +} + +func parseSingle(src []byte, re *regexp.Regexp) (out []byte, err error) { + got, err := parse(src, re, 1) + out = got[0] + return +} + func findDate(ep []byte) time.Time { episodeDateRe := regexp.MustCompile(`brand\-time brand\-menu\-link">(.+?)?\.(.+?)?\.(.+?)? в (.+?)?:(.+?)?</a>`) dateBytes := episodeDateRe.FindSubmatch(ep) @@ -174,12 +194,12 @@ func parseDate(bytes [][]byte) time.Time { func findEnclosure(ep []byte) *feeds.Enclosure { re := regexp.MustCompile(`data\-type="audio"\s+data\-id="(.+?)?">`) - matches := re.FindSubmatch(ep) - if len(matches) < 2 { + res, err := parseSingle(ep, re) + if err != nil { return &feeds.Enclosure{} } - url := "https://audio.vgtrk.com/download?id=" + string(matches[1]) + url := "https://audio.vgtrk.com/download?id=" + string(res) return &feeds.Enclosure{ Url: url, diff --git a/main_test.go b/main_test.go index 73dc0c8..2d520cb 100644 --- a/main_test.go +++ b/main_test.go @@ -26,6 +26,7 @@ import ( "os" "os/exec" "path/filepath" + "regexp" "strings" "sync" "testing" @@ -301,3 +302,41 @@ func TestParseDate(t *testing.T) { } } } + +func TestParseErrors(t *testing.T) { + type testval struct { + src []byte + re *regexp.Regexp + n int + err error + } + + var tests = []testval{ + { + []byte("<h2>Аэростат</h2>"), + programNameRe, + 1, + nil, + }, { + []byte("<h2>Аэростат</h2><h2>foo</h2>"), + programNameRe, + 1, + nil, + }, { + []byte{}, + programNameRe, + 1, + errCantParse, + }, + } + + for _, test := range tests { + res, got := parse(test.src, test.re, test.n) + if test.err != got { + t.Error("for", test.src, test.re, test.n, "\nwant:", test.err, "got:", got) + } + if test.n != len(res) { + t.Error("for", test.src, test.re, test.n, "\nwant length:", test.n, "got:", len(res)) + } + } +} |