mirror of https://github.com/writeas/writefreely
parent
bab5e42299
commit
39477cfcab
@ -0,0 +1,66 @@ |
|||||||
|
package parse |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/writeas/web-core/stringmanip" |
||||||
|
"regexp" |
||||||
|
"strings" |
||||||
|
) |
||||||
|
|
||||||
|
var ( |
||||||
|
titleElementReg = regexp.MustCompile("</?p>") |
||||||
|
urlReg = regexp.MustCompile("https?://") |
||||||
|
imgReg = regexp.MustCompile(`!\[([^]]+)\]\([^)]+\)`) |
||||||
|
) |
||||||
|
|
||||||
|
// PostLede attempts to extract the first thought of the given post, generally
|
||||||
|
// contained within the first line or sentence of text.
|
||||||
|
func PostLede(t string, includePunc bool) string { |
||||||
|
// Adjust where we truncate if we want to include punctuation
|
||||||
|
iAdj := 0 |
||||||
|
if includePunc { |
||||||
|
iAdj = 1 |
||||||
|
} |
||||||
|
|
||||||
|
// Find lede within first line of text
|
||||||
|
nl := strings.IndexRune(t, '\n') |
||||||
|
if nl > -1 { |
||||||
|
t = t[:nl] |
||||||
|
} |
||||||
|
|
||||||
|
// Strip certain HTML tags
|
||||||
|
t = titleElementReg.ReplaceAllString(t, "") |
||||||
|
|
||||||
|
// Strip URL protocols
|
||||||
|
t = urlReg.ReplaceAllString(t, "") |
||||||
|
|
||||||
|
// Strip image URL, leaving only alt text
|
||||||
|
t = imgReg.ReplaceAllString(t, " $1 ") |
||||||
|
|
||||||
|
// Find lede within first sentence
|
||||||
|
punc := strings.Index(t, ". ") |
||||||
|
if punc > -1 { |
||||||
|
t = t[:punc+iAdj] |
||||||
|
} |
||||||
|
punc = stringmanip.IndexRune(t, '。') |
||||||
|
if punc > -1 { |
||||||
|
c := []rune(t) |
||||||
|
t = string(c[:punc+iAdj]) |
||||||
|
} |
||||||
|
|
||||||
|
return t |
||||||
|
} |
||||||
|
|
||||||
|
// TruncToWord truncates the given text to the provided limit.
|
||||||
|
func TruncToWord(s string, l int) (string, bool) { |
||||||
|
truncated := false |
||||||
|
c := []rune(s) |
||||||
|
if len(c) > l { |
||||||
|
truncated = true |
||||||
|
s = string(c[:l]) |
||||||
|
spaceIdx := strings.LastIndexByte(s, ' ') |
||||||
|
if spaceIdx > -1 { |
||||||
|
s = s[:spaceIdx] |
||||||
|
} |
||||||
|
} |
||||||
|
return s, truncated |
||||||
|
} |
@ -0,0 +1,45 @@ |
|||||||
|
package parse |
||||||
|
|
||||||
|
import "testing" |
||||||
|
|
||||||
|
func TestPostLede(t *testing.T) { |
||||||
|
text := map[string]string{ |
||||||
|
"早安。跨出舒適圈,才能前往": "早安。", |
||||||
|
"早安。This is my post. It is great.": "早安。", |
||||||
|
"Hello. 早安。": "Hello.", |
||||||
|
"Sup? Everyone says punctuation is punctuation.": "Sup?", |
||||||
|
"Humans are humans, and society is full of good and bad actors. Technology, at the most fundamental level, is a neutral tool that can be used by either to meet any ends. ": "Humans are humans, and society is full of good and bad actors.", |
||||||
|
`Online Domino Is Must For Everyone |
||||||
|
|
||||||
|
Do you want to understand how to play poker online?`: "Online Domino Is Must For Everyone", |
||||||
|
`おはようございます |
||||||
|
|
||||||
|
私は日本から帰ったばかりです。`: "おはようございます", |
||||||
|
"Hello, we say, おはよう. We say \"good morning\"": "Hello, we say, おはよう.", |
||||||
|
} |
||||||
|
|
||||||
|
c := 1 |
||||||
|
for i, o := range text { |
||||||
|
if s := PostLede(i, true); s != o { |
||||||
|
t.Errorf("#%d: Got '%s' from '%s'; expected '%s'", c, s, i, o) |
||||||
|
} |
||||||
|
c++ |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func TestTruncToWord(t *testing.T) { |
||||||
|
text := map[string]string{ |
||||||
|
"Можливо, ми можемо використовувати інтернет-інструменти, щоб виготовити якийсь текст, який би міг бути і на, і в кінцевому підсумку, буде скорочено, тому що це тривало так довго.": "Можливо, ми можемо використовувати інтернет-інструменти, щоб виготовити якийсь", |
||||||
|
"早安。This is my post. It is great. It is a long post that is great that is a post that is great.": "早安。This is my post. It is great. It is a long post that is great that is a post", |
||||||
|
"Sup? Everyone says punctuation is punctuation.": "Sup? Everyone says punctuation is punctuation.", |
||||||
|
"I arrived in Japan six days ago. Tired from a 10-hour flight after a night-long layover in Calgary, I wandered wide-eyed around Narita airport looking for an ATM.": "I arrived in Japan six days ago. Tired from a 10-hour flight after a night-long", |
||||||
|
} |
||||||
|
|
||||||
|
c := 1 |
||||||
|
for i, o := range text { |
||||||
|
if s, _ := TruncToWord(i, 80); s != o { |
||||||
|
t.Errorf("#%d: Got '%s' from '%s'; expected '%s'", c, s, i, o) |
||||||
|
} |
||||||
|
c++ |
||||||
|
} |
||||||
|
} |
Loading…
Reference in new issue