diff options
author | Evgeny Kuznetsov <evgeny@kuznetsov.md> | 2020-02-05 19:51:43 +0300 |
---|---|---|
committer | Evgeny Kuznetsov <evgeny@kuznetsov.md> | 2020-02-05 19:51:43 +0300 |
commit | 789eb9f24ac72505ef8bd89d6c94a99ba9d7efad (patch) | |
tree | a37ce8aa3015b634a3eccb5d9b7960c22695ec3c | |
parent | 37a1c96b0be5edc140fea87812854f63c8854542 (diff) | |
download | radiorus-rss-789eb9f24ac72505ef8bd89d6c94a99ba9d7efad.tar.gz radiorus-rss-789eb9f24ac72505ef8bd89d6c94a99ba9d7efad.zip |
parse episodes more safely
-rw-r--r-- | main.go | 7 | ||||
-rw-r--r-- | main_test.go | 16 | ||||
-rw-r--r-- | testdata/episodes.badep.0 | 23 | ||||
-rw-r--r-- | testdata/episodes.badep.1 | 34 |
4 files changed, 78 insertions, 2 deletions
@@ -133,7 +133,10 @@ func populateFeed(feed *feeds.Feed, page []byte) (err error) { if len(episodeUrlRe.FindAllSubmatch(episode, -1)) > 1 { return errBadEpisode } - url, _ := parseSingle(episode, episodeUrlRe) + url, err := parseSingle(episode, episodeUrlRe) + if err != nil { + return errBadEpisode + } episodeUrl := urlPrefix + string(url) title, _ := parseSingle(episode, episodeTitleRe) episodeTitle := string(title) @@ -148,7 +151,7 @@ func populateFeed(feed *feeds.Feed, page []byte) (err error) { Created: date, }) } - return nil + return } func parse(src []byte, re *regexp.Regexp, n int) (out [][]byte, err error) { diff --git a/main_test.go b/main_test.go index 2d520cb..d00a6ae 100644 --- a/main_test.go +++ b/main_test.go @@ -27,6 +27,7 @@ import ( "os/exec" "path/filepath" "regexp" + "strconv" "strings" "sync" "testing" @@ -91,6 +92,21 @@ func TestFeed(t *testing.T) { assertGolden(t, actual, golden) } +func TestBadEpisode(t *testing.T) { + feed := &feeds.Feed{ + Link: &feeds.Link{Href: "http://www.radiorus.ru/brand/57083/episodes"}, + } + + for i := 0; i <= 1; i++ { + page := helperLoadBytes(t, "episodes.badep."+strconv.Itoa(i)) + page = cleanText(page) + + if err := populateFeed(feed, page); err != errBadEpisode { + t.Error("for sample", i, "want:", errBadEpisode, "got:", err) + } + } +} + func TestFindEpisodes(t *testing.T) { var tests = []string{ "episodes", diff --git a/testdata/episodes.badep.0 b/testdata/episodes.badep.0 new file mode 100644 index 0000000..13d82f2 --- /dev/null +++ b/testdata/episodes.badep.0 @@ -0,0 +1,23 @@ + <h2><a href="/brand/57083">"Аэростат"</a></h2> + <div class="brand-promo__header"> + <img src="https://cdn-st4.rtr-vesti.ru/vh/pictures/xw/124/617/1.jpg" alt='"Аэростат"'> + <div class="brand__list--wrap--item"> + <div class="photo-wrap"> + <a href="/brand/57083/episode/2237781" class="photo-wrap__link"> + <img src="https://cdn-st1.rtr-vesti.ru/vh/pictures/bw/183/795/6.jpg" + alt="The Cure | momento mori, Malaysia"> + <!-- если фото > 1 [ --> + <!-- ] если фото > 1 --> + </a> + </div> + <a href="/brand/57083/episode/2237781" + class="brand-time brand-menu-link">19.01.2020 в 14:10</a> + <a href="/brand/57083/episode/2237781" class="more-info brand-menu-link">Подробнее</a> + + <!-- если есть аудио[ --> + <div class="audio-count" data-type="audio" + data-id="2466052"></div> + <div class="add-to-list"></div> + <!-- ] если есть аудио --> + + </div> diff --git a/testdata/episodes.badep.1 b/testdata/episodes.badep.1 new file mode 100644 index 0000000..b29fcc8 --- /dev/null +++ b/testdata/episodes.badep.1 @@ -0,0 +1,34 @@ + <h2><a href="/brand/57083">"Аэростат"</a></h2> + <div class="brand-promo__header"> + <img src="https://cdn-st4.rtr-vesti.ru/vh/pictures/xw/124/617/1.jpg" alt='"Аэростат"'> + <div class="brand__list--wrap--item"> + <div class="photo-wrap"> + <a href="/brand/57083/episode/2237849" class="photo-wrap__link"> + <img src="https://cdn-st2.rtr-vesti.ru/vh/pictures/bw/207/010/1.jpg" + alt="Ансамбль "Pied Pipers" | public domain"> + <!-- если фото > 1 [ --> + <!-- ] если фото > 1 --> + </a> + </div> + <a href="/brand/57083/episode/2237849" + class="brand-time brand-menu-link">26.01.2020 в 14:10</a> + <a href="/brand/57083/episode/2237849" class="title brand-menu-link">Новые имена 27</a> + <a href="/brand/57083/episode/2237781" class="photo-wrap__link"> + <img src="https://cdn-st1.rtr-vesti.ru/vh/pictures/bw/183/795/6.jpg" + alt="The Cure | momento mori, Malaysia"> + <!-- если фото > 1 [ --> + <!-- ] если фото > 1 --> + </a> + </div> + <a href="/brand/57083/episode/2237781" + class="brand-time brand-menu-link">19.01.2020 в 14:10</a> + <a href="/brand/57083/episode/2237781" class="title brand-menu-link">The Cure</a> + <a href="/brand/57083/episode/2237781" class="more-info brand-menu-link">Подробнее</a> + + <!-- если есть аудио[ --> + <div class="audio-count" data-type="audio" + data-id="2466052"></div> + <div class="add-to-list"></div> + <!-- ] если есть аудио --> + + </div> |