// Copyright (C) 2019-2020 Evgeny Kuznetsov (evgeny@kuznetsov.md)
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see
(.+?)?
`) episodeTitleRe = regexp.MustCompile(`title brand\-menu\-link">(.+?)?`) episodeUrlRe = regexp.MustCompile(` 1 { return errBadEpisode } episodeUrl := urlPrefix + string(episodeUrlRe.FindSubmatch(episode)[1]) episodeTitle := string(episodeTitleRe.FindSubmatch(episode)[1]) episodeAudioUrl := "https://audio.vgtrk.com/download?id=" + string(episodeAudioRe.FindSubmatch(episode)[1]) dateBytes := episodeDateRe.FindSubmatch(episode) var date [5]int for i, b := range dateBytes[1:] { d, err := strconv.Atoi(string(b)) if err != nil { log.Fatal(err) } date[i] = d } moscow := time.FixedZone("Moscow Time", int((3 * time.Hour).Seconds())) episodeDate := time.Date(date[2], time.Month(date[1]), date[0], date[3], date[4], 0, 0, moscow) feed.Add(&feeds.Item{ Id: episodeUrl, Link: &feeds.Link{Href: episodeUrl}, Title: episodeTitle, Enclosure: &feeds.Enclosure{ Url: episodeAudioUrl, Length: "1024", Type: "audio/mpeg", }, Created: episodeDate, }) } return nil } func describeFeed(feed *feeds.Feed, wg *sync.WaitGroup) { defer wg.Done() programAboutUrl := strings.TrimSuffix(feed.Link.Href, "episodes") + "about" page := getPage(programAboutUrl) feed.Description = processFeedDesc(page) } func processFeedDesc(page []byte) string { programAbout := programAboutRe.FindSubmatch(page)[1] re := regexp.MustCompile(`<(.+?)?>`) return string(re.ReplaceAll(programAbout, []byte(``))) } func describeEpisodes(feed *feeds.Feed) { var wg sync.WaitGroup for _, item := range feed.Items { wg.Add(1) go describeEpisode(item, &wg) } wg.Wait() } func describeEpisode(item *feeds.Item, wg *sync.WaitGroup) { defer wg.Done() page := getPage(item.Link.Href) desc, err := processEpisodeDesc(page) if err != nil { log.Printf("could not find episode description on page %v: %v", item.Link.Href, err) } item.Description = desc } func processEpisodeDesc(page []byte) (string, error) { matches := episodeDescRe.FindSubmatch(page) if len(matches) < 2 { return "", errCantParse } return string(matches[1]), nil } func getPage(pageUrl string) []byte { res, err := http.Get(pageUrl) if err != nil { log.Fatal(err) } defer res.Body.Close() page, err := ioutil.ReadAll(res.Body) if err != nil { log.Fatal(err) } page = cleanText(page) return page } // cleanText replaces HTML-encoded symbols with proper UTF func cleanText(b []byte) []byte { for _, sub := range substitutes { re := regexp.MustCompile(sub.from) b = re.ReplaceAll(b, []byte(sub.to)) } return b } // episodeURLPrefix derives common episode URL prefix from programme page URL func episodeURLPrefix(url string) string { return strings.Split(url, "/brand/")[0] + "/brand/" }