Gigantic Update
* Migrated manual tests to jest and started working on better coverage * Added a bookmarklet and push key generation tool allowing URL pushing from bookmarklets * Updated web scraping with tons of bug fixes * Updated attachments page to handle new push links * Aggressive note change checking, if patches get out of sync, server overwrites bad updates.
This commit is contained in:
@@ -72,6 +72,8 @@ CryptoString.createSalt = () => {
|
||||
|
||||
return crypto.randomBytes(SALT_BYTE_SIZE).toString('base64')
|
||||
}
|
||||
|
||||
// Creates a small random salt
|
||||
CryptoString.createSmallSalt = () => {
|
||||
|
||||
return crypto.randomBytes(20).toString('base64')
|
||||
|
@@ -6,7 +6,7 @@ let SiteScrape = module.exports = {}
|
||||
|
||||
const removeWhitespace = /\s+/g
|
||||
|
||||
const commonWords = ['share','facebook','twitter','reddit','be','have','do','say','get','make','go','know','take','see','come','think','look','want',
|
||||
const commonWords = ['just','start','what','these','how', 'was', 'being','can','way','share','facebook','twitter','reddit','be','have','do','say','get','make','go','know','take','see','come','think','look','want',
|
||||
'give','use','find','tell','ask','work','seem','feel','try','leave','call','good','new','first','last','long','great','little','own','other','old',
|
||||
'right','big','high','different','small','large','next','early','young','important','few','public','bad','same','able','to','of','in','for','on',
|
||||
'with','at','by','from','up','about','into','over','after','the','and','a','that','I','it','not','he','as','you','this','but','his','they','her',
|
||||
@@ -162,19 +162,28 @@ SiteScrape.getKeywords = ($) => {
|
||||
|
||||
majorContent += $('[class*=content]').text()
|
||||
.replace(removeWhitespace, " ") //Remove all whitespace
|
||||
.replace(/\W\s/g, '') //Remove all non alphanumeric characters
|
||||
.substring(0,3000) //Limit to 3000 characters
|
||||
// .replace(/\W\s/g, '') //Remove all non alphanumeric characters
|
||||
.substring(0,6000) //Limit to 6000 characters
|
||||
.toLowerCase()
|
||||
.replace(/[^A-Za-z0-9- ]/g, '');
|
||||
|
||||
|
||||
console.log(majorContent)
|
||||
|
||||
//Count frequency of each word in scraped text
|
||||
let frequency = {}
|
||||
majorContent.split(' ').forEach(word => {
|
||||
if(commonWords.includes(word)){
|
||||
return //Exclude certain words
|
||||
// Exclude short or common words
|
||||
if(commonWords.includes(word) || word.length <= 2){
|
||||
return
|
||||
}
|
||||
if(!frequency[word]){
|
||||
frequency[word] = 0
|
||||
}
|
||||
// Skip some plurals
|
||||
if(frequency[word+'s'] || frequency[word+'es']){
|
||||
return
|
||||
}
|
||||
frequency[word]++
|
||||
})
|
||||
|
||||
@@ -192,7 +201,7 @@ SiteScrape.getKeywords = ($) => {
|
||||
});
|
||||
|
||||
let finalWords = []
|
||||
for(let i=0; i<5; i++){
|
||||
for(let i=0; i<6; i++){
|
||||
if(sortable[i] && sortable[i][0]){
|
||||
finalWords.push(sortable[i][0])
|
||||
}
|
||||
|
Reference in New Issue
Block a user