SolidScribe/server/helpers/ProcessText.js

215 lines
5.7 KiB
JavaScript
Raw Normal View History

let ProcessText = module.exports = {}
ProcessText.removeHtml = (string) => {
if(string == undefined || string == null || string.length == 0){
return ''
}
return string
2020-02-01 14:21:22 -08:00
.replace(/&[[#A-Za-z0-9]+A-Za-z0-9]+;/g,' ') //Rip out all HTML entities
.replace(/<[^>]+>/g, ' ') //Rip out all HTML tags
.replace(/\s+/g, ' ') //Remove all whitespace
.trim()
}
//Remove Empty HTML lines from a string
ProcessText.stripBlankHtmlLines = (string) => {
if(string == undefined || string == null || string.length == 0){
return ''
}
//Blank lines look like this -> <p><br></p>
return string.replace(/\<p\>\<br\>\<\/p\>/g,'')
}
//Remove Double Empty HTML lines from a string
ProcessText.stripDoubleBlankLines = (string) => {
if(string == undefined || string == null || string.length == 0){
return ''
}
//Blank lines look like this -> <p><br></p>
return string.replace(/\<p\>\<br\>\<\/p\>\<p\>\<br\>\<\/p\>/g,'')
}
ProcessText.getUrlsFromString = (string) => {
const urlPattern = /(?:(?:https?|ftp|file):\/\/|www\.|ftp\.)(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[-A-Z0-9+&@#/%=~_|$?!:,.])*(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[A-Z0-9+&@#/%=~_|$])/igm
return string.match(urlPattern)
}
/*
Pulls out title and subtext of note
+ Title is always first line
+ Empty lines are skipped
+ URLs are turned into links
+ All URLs are givent the target="_blank" property
2020-02-01 14:21:22 -08:00
+ Lists are given extra display characters
+ If note starts as a list, skip the title
*/
ProcessText.deduceNoteTitle = (inTitle, inString) => {
let title = inTitle //Title of note
let sub = '' //sub text below note
//Always return a title as a String
if(title == null){
title = ''
}
if(!inString || inString == null || inString.length == 0){
return {title, sub}
}
2020-02-01 14:21:22 -08:00
//Remove inline styles that may be added by editor
// inString = inString.replace(/style=".*?"/g,'')
const tagFreeLength = ProcessText.removeHtml(inString).length
//
// Simplified attempt!
// Remove tags, push caret if greater than 200 chars...thats it
// Still needs, links to open in a new window.
sub = ProcessText.stripDoubleBlankLines(inString)
// if(tagFreeLength > 200){
// sub += '... <i class="green caret down icon"></i>'
// }
inString += '</end>'
return {title, sub}
//Emergency ending tag if truncated. This will help regex find all the lines
2020-02-01 14:21:22 -08:00
//Match full line and closing tag or just closing tag
let lines = inString.match(/[<[a-zA-Z0-9]+>(.*?)<\/[a-zA-Z0-9]+>|<\/[a-zA-Z0-9>]+?>/gms)
2020-02-01 14:21:22 -08:00
if(lines == null){ lines = [inString] }
//.match(/[^\r\n]+/g) //Match return or newline
// console.log('----------------')
2020-02-01 14:21:22 -08:00
// console.log(lines)
// console.log('----------------')
2020-02-01 14:21:22 -08:00
let finalLines = []
const startTags = ['<ol','<li','<ul']
const endTags = ['</o','</l','</u']
let totalLines = Math.min(lines.length, 6)
let charLimit = 400
let listStart = false
2020-02-01 14:21:22 -08:00
let noTitleJustList = false
let appendCaret = false
for(let i=0; i < totalLines; i++){
//Just in case 'i' gets bigger than array
if(lines[i] === undefined){
continue
}
2020-02-01 14:21:22 -08:00
//Various empty chars are possible
const cleanLine = ProcessText.removeHtml(lines[i])
.replace('<br>','')
.trim()
const lineStart = lines[i].trim().substring(0, 3)
charLimit -= cleanLine.length
//Close out list if char limit is hit
if(charLimit <= 0 && listStart){
finalLines.push(lines[i])
break
}
//Images appear as empty, push em!
if(cleanLine.length == 0 && lines[i].indexOf('<img') != -1){
finalLines.push(lines[i])
continue
}
2020-02-01 14:21:22 -08:00
//Check if note starts with a list, don't include title, just show list
if(finalLines.length == 0 && startTags.includes(lineStart)){
noTitleJustList = true
}
//Empty line, may be a list open or close
if(cleanLine.length == 0 && (startTags.includes(lineStart) || endTags.includes(lineStart) )){
if(listStart == false){
//charLimit = 400 //Double size for list notes
}
finalLines.push(lines[i])
totalLines++
listStart = true
continue
}
//If line is part of a list, up counter, we want the whole list
if(startTags.includes(lineStart)){
totalLines++
}
//Skip empty lines
2020-02-01 14:21:22 -08:00
if(!cleanLine || cleanLine.length == 0){
totalLines++
continue
}
//turn urls into links, don't process if its already an <a href=
const containsUrls = ProcessText.getUrlsFromString(cleanLine)
if(containsUrls && containsUrls.length == 1 && lines[i].indexOf('</a>') == -1){
const url = containsUrls[0]
lines[i] = lines[i].replace(url, `<a href="${url}">${url}</a>`)
}
//Insert target=_blank into links if set, do it for every link in line
if(lines[i].indexOf('</a>') > 0){
lines[i] = lines[i].replace(/<a /g, '<a target="_blank" ')
}
//Limit output characters
//Check character limit
if(charLimit <= 0 && listStart == false){
//Cut the string down to character limit
const cutString = lines[i].substring(0, lines[i].length+charLimit)
//Find last space and cut off everything after it
let cleanCutString = cutString.substring(0, cutString.lastIndexOf(' '))
//Some strings may not contain a space resulting in no string
if(cleanCutString.length == 0){
cleanCutString = cutString
}
appendCaret = true
break;
}
finalLines.push(lines[i])
}
if(tagFreeLength.length >= 300 || appendCaret){
finalLines.push('... <i class="green caret down icon"></i>')
}
//Pull out title if its not an empty string
if(!noTitleJustList && title == ''){
title = ProcessText.removeHtml( finalLines.shift() ).replace('&nbsp','')
}
sub = finalLines.join('')
//Return final display lengths
let titleLength = ProcessText.removeHtml(title).trim().replace('&nbsp','').length
let subtextLength = ProcessText.removeHtml(sub).trim().replace('&nbsp','').length
return { title, sub, titleLength, subtextLength }
}