aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvgeny Kuznetsov <evgeny@kuznetsov.md>2020-02-05 18:48:03 +0300
committerEvgeny Kuznetsov <evgeny@kuznetsov.md>2020-02-05 18:48:03 +0300
commit2c054e0e4dbc94ecff95d62fd8a06b98405ff4d4 (patch)
treed2fcf09e0810c148219dc32171f1c5d193eb04dc
parent85caee2d9cad1f022e7101b1a04016f92fda64d1 (diff)
downloadradiorus-rss-2c054e0e4dbc94ecff95d62fd8a06b98405ff4d4.tar.gz
radiorus-rss-2c054e0e4dbc94ecff95d62fd8a06b98405ff4d4.zip
parse regexes safely
-rw-r--r--main.go38
-rw-r--r--main_test.go39
2 files changed, 68 insertions, 9 deletions
diff --git a/main.go b/main.go
index 9ffdafd..16463b1 100644
--- a/main.go
+++ b/main.go
@@ -113,12 +113,12 @@ func getFeed(url string) (feed *feeds.Feed) {
}
func populateFeed(feed *feeds.Feed, page []byte) (err error) {
- titleMatch := programNameRe.FindSubmatch(page)
- if len(titleMatch) < 1 {
- return fmt.Errorf("bad programme page")
+ title, err := parseSingle(page, programNameRe)
+ if err != nil {
+ return fmt.Errorf("bad programme page: title not found")
}
+ feed.Title = stripLink(string(title))
- feed.Title = stripLink(string(titleMatch[1]))
programImage := programImageRe.FindSubmatch(page)
feed.Image = &feeds.Image{
Link: feed.Link.Href,
@@ -133,8 +133,10 @@ func populateFeed(feed *feeds.Feed, page []byte) (err error) {
if len(episodeUrlRe.FindAllSubmatch(episode, -1)) > 1 {
return errBadEpisode
}
- episodeUrl := urlPrefix + string(episodeUrlRe.FindSubmatch(episode)[1])
- episodeTitle := string(episodeTitleRe.FindSubmatch(episode)[1])
+ url, _ := parseSingle(episode, episodeUrlRe)
+ episodeUrl := urlPrefix + string(url)
+ title, _ := parseSingle(episode, episodeTitleRe)
+ episodeTitle := string(title)
enclosure := findEnclosure(episode)
date := findDate(episode)
@@ -149,6 +151,24 @@ func populateFeed(feed *feeds.Feed, page []byte) (err error) {
return nil
}
+func parse(src []byte, re *regexp.Regexp, n int) (out [][]byte, err error) {
+ match := re.FindSubmatch(src)
+ if len(match) != n+1 {
+ for i := 0; i < n; i++ {
+ out = append(out, []byte{})
+ }
+ return out, errCantParse
+ }
+
+ return match[1:], nil
+}
+
+func parseSingle(src []byte, re *regexp.Regexp) (out []byte, err error) {
+ got, err := parse(src, re, 1)
+ out = got[0]
+ return
+}
+
func findDate(ep []byte) time.Time {
episodeDateRe := regexp.MustCompile(`brand\-time brand\-menu\-link">(.+?)?\.(.+?)?\.(.+?)? в (.+?)?:(.+?)?</a>`)
dateBytes := episodeDateRe.FindSubmatch(ep)
@@ -174,12 +194,12 @@ func parseDate(bytes [][]byte) time.Time {
func findEnclosure(ep []byte) *feeds.Enclosure {
re := regexp.MustCompile(`data\-type="audio"\s+data\-id="(.+?)?">`)
- matches := re.FindSubmatch(ep)
- if len(matches) < 2 {
+ res, err := parseSingle(ep, re)
+ if err != nil {
return &feeds.Enclosure{}
}
- url := "https://audio.vgtrk.com/download?id=" + string(matches[1])
+ url := "https://audio.vgtrk.com/download?id=" + string(res)
return &feeds.Enclosure{
Url: url,
diff --git a/main_test.go b/main_test.go
index 73dc0c8..2d520cb 100644
--- a/main_test.go
+++ b/main_test.go
@@ -26,6 +26,7 @@ import (
"os"
"os/exec"
"path/filepath"
+ "regexp"
"strings"
"sync"
"testing"
@@ -301,3 +302,41 @@ func TestParseDate(t *testing.T) {
}
}
}
+
+func TestParseErrors(t *testing.T) {
+ type testval struct {
+ src []byte
+ re *regexp.Regexp
+ n int
+ err error
+ }
+
+ var tests = []testval{
+ {
+ []byte("<h2>Аэростат</h2>"),
+ programNameRe,
+ 1,
+ nil,
+ }, {
+ []byte("<h2>Аэростат</h2><h2>foo</h2>"),
+ programNameRe,
+ 1,
+ nil,
+ }, {
+ []byte{},
+ programNameRe,
+ 1,
+ errCantParse,
+ },
+ }
+
+ for _, test := range tests {
+ res, got := parse(test.src, test.re, test.n)
+ if test.err != got {
+ t.Error("for", test.src, test.re, test.n, "\nwant:", test.err, "got:", got)
+ }
+ if test.n != len(res) {
+ t.Error("for", test.src, test.re, test.n, "\nwant length:", test.n, "got:", len(res))
+ }
+ }
+}