diff --git a/parse/posts.go b/parse/posts.go new file mode 100644 index 0000000..23d6f7f --- /dev/null +++ b/parse/posts.go @@ -0,0 +1,66 @@ +package parse + +import ( + "github.com/writeas/web-core/stringmanip" + "regexp" + "strings" +) + +var ( + titleElementReg = regexp.MustCompile("") + urlReg = regexp.MustCompile("https?://") + imgReg = regexp.MustCompile(`!\[([^]]+)\]\([^)]+\)`) +) + +// PostLede attempts to extract the first thought of the given post, generally +// contained within the first line or sentence of text. +func PostLede(t string, includePunc bool) string { + // Adjust where we truncate if we want to include punctuation + iAdj := 0 + if includePunc { + iAdj = 1 + } + + // Find lede within first line of text + nl := strings.IndexRune(t, '\n') + if nl > -1 { + t = t[:nl] + } + + // Strip certain HTML tags + t = titleElementReg.ReplaceAllString(t, "") + + // Strip URL protocols + t = urlReg.ReplaceAllString(t, "") + + // Strip image URL, leaving only alt text + t = imgReg.ReplaceAllString(t, " $1 ") + + // Find lede within first sentence + punc := strings.Index(t, ". ") + if punc > -1 { + t = t[:punc+iAdj] + } + punc = stringmanip.IndexRune(t, '。') + if punc > -1 { + c := []rune(t) + t = string(c[:punc+iAdj]) + } + + return t +} + +// TruncToWord truncates the given text to the provided limit. +func TruncToWord(s string, l int) (string, bool) { + truncated := false + c := []rune(s) + if len(c) > l { + truncated = true + s = string(c[:l]) + spaceIdx := strings.LastIndexByte(s, ' ') + if spaceIdx > -1 { + s = s[:spaceIdx] + } + } + return s, truncated +} diff --git a/parse/posts_test.go b/parse/posts_test.go new file mode 100644 index 0000000..70bf052 --- /dev/null +++ b/parse/posts_test.go @@ -0,0 +1,45 @@ +package parse + +import "testing" + +func TestPostLede(t *testing.T) { + text := map[string]string{ + "早安。跨出舒適圈,才能前往": "早安。", + "早安。This is my post. It is great.": "早安。", + "Hello. 早安。": "Hello.", + "Sup? Everyone says punctuation is punctuation.": "Sup?", + "Humans are humans, and society is full of good and bad actors. Technology, at the most fundamental level, is a neutral tool that can be used by either to meet any ends. ": "Humans are humans, and society is full of good and bad actors.", + `Online Domino Is Must For Everyone + + Do you want to understand how to play poker online?`: "Online Domino Is Must For Everyone", + `おはようございます + + 私は日本から帰ったばかりです。`: "おはようございます", + "Hello, we say, おはよう. We say \"good morning\"": "Hello, we say, おはよう.", + } + + c := 1 + for i, o := range text { + if s := PostLede(i, true); s != o { + t.Errorf("#%d: Got '%s' from '%s'; expected '%s'", c, s, i, o) + } + c++ + } +} + +func TestTruncToWord(t *testing.T) { + text := map[string]string{ + "Можливо, ми можемо використовувати інтернет-інструменти, щоб виготовити якийсь текст, який би міг бути і на, і в кінцевому підсумку, буде скорочено, тому що це тривало так довго.": "Можливо, ми можемо використовувати інтернет-інструменти, щоб виготовити якийсь", + "早安。This is my post. It is great. It is a long post that is great that is a post that is great.": "早安。This is my post. It is great. It is a long post that is great that is a post", + "Sup? Everyone says punctuation is punctuation.": "Sup? Everyone says punctuation is punctuation.", + "I arrived in Japan six days ago. Tired from a 10-hour flight after a night-long layover in Calgary, I wandered wide-eyed around Narita airport looking for an ATM.": "I arrived in Japan six days ago. Tired from a 10-hour flight after a night-long", + } + + c := 1 + for i, o := range text { + if s, _ := TruncToWord(i, 80); s != o { + t.Errorf("#%d: Got '%s' from '%s'; expected '%s'", c, s, i, o) + } + c++ + } +} diff --git a/posts.go b/posts.go index 7f12da3..32c18dc 100644 --- a/posts.go +++ b/posts.go @@ -7,8 +7,8 @@ import ( "github.com/writeas/monday" "github.com/writeas/slug" "github.com/writeas/web-core/converter" - "github.com/writeas/web-core/parse" "github.com/writeas/web-core/tags" + "github.com/writeas/writefreely/parse" "html/template" "regexp" "time"