aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvgeny Kuznetsov <evgeny@kuznetsov.md>2020-01-27 20:15:39 +0300
committerEvgeny Kuznetsov <evgeny@kuznetsov.md>2020-01-27 20:54:36 +0300
commite7afcc9f77852b40060f7a742c83886c127ffedf (patch)
tree69cf375c49c6497b17c4c14e5e6c3a66d5afa3ff
parent3f683db50e2083550a38d9414d95b7f766a00531 (diff)
downloadradiorus-rss-e7afcc9f77852b40060f7a742c83886c127ffedf.tar.gz
radiorus-rss-e7afcc9f77852b40060f7a742c83886c127ffedf.zip
isolate HTML cleaning
-rw-r--r--main.go17
1 files changed, 13 insertions, 4 deletions
diff --git a/main.go b/main.go
index 23f6eac..0c8baf4 100644
--- a/main.go
+++ b/main.go
@@ -17,13 +17,14 @@ package main
import (
"flag"
- "github.com/gorilla/feeds"
"io/ioutil"
"log"
"net/http"
"regexp"
"strconv"
"time"
+
+ "github.com/gorilla/feeds"
)
type subst struct {
@@ -32,7 +33,7 @@ type subst struct {
}
var (
- substitutes = []subst{
+ substitutes = []subst{ // these need to be changed to show up properly in the feed
{from: `&quot;`, to: `"`},
{from: `&ndash;`, to: `–`},
}
@@ -147,9 +148,17 @@ func getPage(pageUrl string) []byte {
if err != nil {
log.Fatal(err)
}
+
+ page = cleanText(page)
+
+ return page
+}
+
+// cleanText replaces HTML-encoded symbols with proper UTF
+func cleanText(b []byte) []byte {
for _, sub := range substitutes {
re := regexp.MustCompile(sub.from)
- page = re.ReplaceAll(page, []byte(sub.to))
+ b = re.ReplaceAll(b, []byte(sub.to))
}
- return page
+ return b
}