diff options
author | Evgeny Kuznetsov <evgeny@kuznetsov.md> | 2020-01-27 20:15:39 +0300 |
---|---|---|
committer | Evgeny Kuznetsov <evgeny@kuznetsov.md> | 2020-01-27 20:54:36 +0300 |
commit | e7afcc9f77852b40060f7a742c83886c127ffedf (patch) | |
tree | 69cf375c49c6497b17c4c14e5e6c3a66d5afa3ff /main.go | |
parent | 3f683db50e2083550a38d9414d95b7f766a00531 (diff) | |
download | radiorus-rss-e7afcc9f77852b40060f7a742c83886c127ffedf.tar.gz radiorus-rss-e7afcc9f77852b40060f7a742c83886c127ffedf.zip |
isolate HTML cleaning
Diffstat (limited to 'main.go')
-rw-r--r-- | main.go | 17 |
1 files changed, 13 insertions, 4 deletions
@@ -17,13 +17,14 @@ package main import ( "flag" - "github.com/gorilla/feeds" "io/ioutil" "log" "net/http" "regexp" "strconv" "time" + + "github.com/gorilla/feeds" ) type subst struct { @@ -32,7 +33,7 @@ type subst struct { } var ( - substitutes = []subst{ + substitutes = []subst{ // these need to be changed to show up properly in the feed {from: `"`, to: `"`}, {from: `–`, to: `–`}, } @@ -147,9 +148,17 @@ func getPage(pageUrl string) []byte { if err != nil { log.Fatal(err) } + + page = cleanText(page) + + return page +} + +// cleanText replaces HTML-encoded symbols with proper UTF +func cleanText(b []byte) []byte { for _, sub := range substitutes { re := regexp.MustCompile(sub.from) - page = re.ReplaceAll(page, []byte(sub.to)) + b = re.ReplaceAll(b, []byte(sub.to)) } - return page + return b } |