// Copyright (C) 2019-2020 Evgeny Kuznetsov (evgeny@kuznetsov.md) // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . package main import ( "flag" "fmt" "io/ioutil" "log" "net/http" "regexp" "strconv" "strings" "sync" "time" "github.com/gorilla/feeds" ) type subst struct { from string to string } var ( substitutes = []subst{ // these need to be changed to show up properly in the feed {from: `"`, to: `"`}, {from: `–`, to: `–`}, } programNameRe = regexp.MustCompile(`

(.+?)?

`) programAboutRe = regexp.MustCompile(`(?s)
(.+?)?
`) programImageRe = regexp.MustCompile(`(?s)
(.+?)?`) episodeRe = regexp.MustCompile(`(?s)
(.+?)?
`) episodeAudioRe = regexp.MustCompile(`data\-id="(.+?)?">`) episodeDateRe = regexp.MustCompile(`brand\-time brand\-menu\-link">(.+?)?\.(.+?)?\.(.+?)? в (.+?)?:(.+?)?`) episodeDescRe = regexp.MustCompile(`

(.+?)?

`) episodeTitleRe = regexp.MustCompile(`title brand\-menu\-link">(.+?)?`) episodeUrlRe = regexp.MustCompile(` 1 { return fmt.Errorf("bad episode") } episodeUrl := "http://www.radiorus.ru/brand/" + string(episodeUrlRe.FindSubmatch(episode)[1]) episodeTitle := string(episodeTitleRe.FindSubmatch(episode)[1]) episodeAudioUrl := "https://audio.vgtrk.com/download?id=" + string(episodeAudioRe.FindSubmatch(episode)[1]) dateBytes := episodeDateRe.FindSubmatch(episode) var date [5]int for i, b := range dateBytes[1:] { d, err := strconv.Atoi(string(b)) if err != nil { log.Fatal(err) } date[i] = d } moscow := time.FixedZone("Moscow Time", int((3 * time.Hour).Seconds())) episodeDate := time.Date(date[2], time.Month(date[1]), date[0], date[3], date[4], 0, 0, moscow) feed.Add(&feeds.Item{ Id: episodeUrl, Link: &feeds.Link{Href: episodeUrl}, Title: episodeTitle, Enclosure: &feeds.Enclosure{ Url: episodeAudioUrl, Length: "1024", Type: "audio/mpeg", }, Created: episodeDate, }) } return nil } func describeFeed(feed *feeds.Feed, wg *sync.WaitGroup) { defer wg.Done() programAboutUrl := strings.TrimSuffix(feed.Link.Href, "episodes") + "about" page := getPage(programAboutUrl) feed.Description = processFeedDesc(page) } func processFeedDesc(page []byte) string { programAbout := programAboutRe.FindSubmatch(page)[1] re := regexp.MustCompile(`<(.+?)?>`) return string(re.ReplaceAll(programAbout, []byte(``))) } func describeEpisodes(feed *feeds.Feed) { var wg sync.WaitGroup for _, item := range feed.Items { wg.Add(1) go describeEpisode(item, &wg) } wg.Wait() } func describeEpisode(item *feeds.Item, wg *sync.WaitGroup) { defer wg.Done() page := getPage(item.Link.Href) item.Description = processEpisodeDesc(page) } func processEpisodeDesc(page []byte) string { return string(episodeDescRe.FindSubmatch(page)[1]) } func getPage(pageUrl string) []byte { res, err := http.Get(pageUrl) if err != nil { log.Fatal(err) } defer res.Body.Close() page, err := ioutil.ReadAll(res.Body) if err != nil { log.Fatal(err) } page = cleanText(page) return page } // cleanText replaces HTML-encoded symbols with proper UTF func cleanText(b []byte) []byte { for _, sub := range substitutes { re := regexp.MustCompile(sub.from) b = re.ReplaceAll(b, []byte(sub.to)) } return b }