Compare commits
No commits in common. "7c15427b3d4cde2119c6558ba94d950371b78d19" and "c11f1b1b6ff2dced99ac1fb25ecc49f31f01c099" have entirely different histories.
7c15427b3d
...
c11f1b1b6f
2
.gitignore
vendored
2
.gitignore
vendored
@ -6,4 +6,4 @@ pids
|
|||||||
*.pid
|
*.pid
|
||||||
*.seed
|
*.seed
|
||||||
*.pid.lock
|
*.pid.lock
|
||||||
.env
|
|
||||||
|
@ -10,49 +10,10 @@
|
|||||||
<meta name="theme-color" content="#000" />
|
<meta name="theme-color" content="#000" />
|
||||||
<link rel="manifest" href="/api/static/assets/manifest.json">
|
<link rel="manifest" href="/api/static/assets/manifest.json">
|
||||||
|
|
||||||
<title>Solid Scribe - A Note Taking Website</title>
|
<title>Notes</title>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="app">
|
<div id="app"></div>
|
||||||
<!-- placeholder data for scrapers with no JS -->
|
|
||||||
<style>
|
|
||||||
|
|
||||||
.centered {
|
|
||||||
position: fixed;
|
|
||||||
top: 50%;
|
|
||||||
left: 50%;
|
|
||||||
transform: translate(-50%, -50%);
|
|
||||||
text-align: center;
|
|
||||||
font-family: Arial, Helvetica, sans-serif;
|
|
||||||
}
|
|
||||||
.logo {
|
|
||||||
width: 200px;
|
|
||||||
height: auto;
|
|
||||||
}
|
|
||||||
.scrape-info {
|
|
||||||
opacity: 0;
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
|
|
||||||
<div class="centered">
|
|
||||||
<img class="logo" src="/api/static/assets/logo.svg" alt="logo">
|
|
||||||
<h1>Solid Scribe</h1>
|
|
||||||
<h3>Loading...</h3>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="scrape-info">
|
|
||||||
<h1>Solid Scribe</h1>
|
|
||||||
<h2>A note application that respects your privacy.</h2>
|
|
||||||
<p>Take notes with a clean editor that works on desktop or mobile.</p>
|
|
||||||
<p>Search notes, links and files to find what you need.</p>
|
|
||||||
<p>Accessable everywhere.</p>
|
|
||||||
<p>Categorize notes with tags.</p>
|
|
||||||
<p>Share data with fellow users.</p>
|
|
||||||
<p>Encrypt notes for additional security.</p>
|
|
||||||
<b>This site requires Javascipt to run.</b>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
<!-- built files will be auto injected -->
|
<!-- built files will be auto injected -->
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
@ -2,16 +2,17 @@
|
|||||||
<div class="ui basic segment">
|
<div class="ui basic segment">
|
||||||
<div class="ui grid">
|
<div class="ui grid">
|
||||||
<div class="sixteen wide column">
|
<div class="sixteen wide column">
|
||||||
<div class="ui text container">
|
|
||||||
|
|
||||||
<h2 class="ui dividing header">
|
<h2 class="ui dividing header">
|
||||||
Help
|
Help
|
||||||
</h2>
|
</h2>
|
||||||
|
</div>
|
||||||
|
<div class="sixteen wide column">
|
||||||
|
<div class="ui text container">
|
||||||
|
|
||||||
<!-- Content copied from note -->
|
<!-- Content copied from note -->
|
||||||
<!-- https://www.solidscribe.com/#/notes/open/552 -->
|
<!-- https://www.solidscribe.com/#/notes/open/552 -->
|
||||||
|
|
||||||
<p><b>Quick Note</b><br></p><p>The Quick note feature was designed to allow rapid input to a single note. Rather than junking up all your notes with random links, numbers or haikus, you can put them all in one place. <br></p><p>All data pushed to the quick note can still be edited like a normal note.<br></p><p><br></p><p><b>Dark Theme</b><br></p><p>Dark theme was designed to minimize the amount of blue. Less blue entering your eyes is supposed to help you fall asleep.<br></p><p>Most things turn sepia and a filter is applied to images to make them more sepia.<br></p><p>Here is some good research on the topic: <a href="https://justgetflux.com/research.html">https://justgetflux.com/research.html</a><br></p><p><br></p><p><b>Password Protected Notes</b><br></p><p>Note protected with a password are encrypted. This means the data is scrambled and unreadable unless the correct password is used to decrypt them.<br></p><p>If a password is forgotten, it can never be recovered. Passwords are not saved for encrypted notes. If you lose the password to a protected note, that note text is lost. <br></p><p>Only the text of the note is protected. Tags, Files attached to the note, and the title of the note are still visible without a password. You can not search text in a password protected note. But you can search by the title.<br></p><p><br></p><p><b>Links in notes</b><br></p><p>Links put into notes are automatically scraped. This means the data from the link will be scanned to get an image and some text from the website to help make that link more accessible in the future. <br></p><p><br></p><p><b>Files in notes</b><br></p><p>Files can be uploaded to notes. If its an image, the picture will be put into the note.<br></p><p>Images added to notes will have the text pulled out so it can be searched (This isn't super accurate so don't rely to heavily on it.) The text can be updated at any time.<br></p><p><br></p><p><b>Deleting notes</b><br></p><p>When<b> </b>notes are deleted, none of the files related to the note are deleted. <br></p><p><br></p><p><b>Daily Backups</b><br></p><p>All notes are backed up, every night, at midnight. If there is data loss, it can be restored from a backup. If you experience some sort of cataclysmic data loss please contact the system administrator for a copy of your data or restoration a restoration procedure. <br></p>
|
<p><b>Quick Note</b><br></p><p>The Quick note feature was designed to allow rapid input to a single note. Rather than junking up all your notes with random links, numbers or haikus, you can put them all in one place. <br></p><p>All data pushed to the quick note can still be edited like a normal note.<br></p><p><br></p><p><b>Dark Theme</b><br></p><p>Dark theme was designed to minimize the amount of blue. Less blue entering your eyes is supposed to help you fall asleep.<br></p><p>Most things turn sepia and a filter is applied to images to make them more sepia.<br></p><p>Here is some good research on the topic: <a href="https://justgetflux.com/research.html">https://justgetflux.com/research.html</a><br></p><p><br></p><p><b>Password Protected Notes</b><br></p><p>Note protected with a password are encrypted. This means the data is scrambled and unreadable unless the correct password is used to decrypt them.<br></p><p>If a password is forgotten, it can never be recovered. Passwords are not saved for encrypted notes. If you lose the password to a protected note, that note text is lost. <br></p><p>Only the text of the note is protected. Tags, Files attached to the note, and the title of the note are still visible without a password. You can not search text in a password protected note. But you can search by the title.<br></p><p><br></p><p><b>Links in notes</b><br></p><p>Links put into notes are automatically scraped. This means the data from the link will be scanned to get an image and some text from the website to help make that link more accessible in the future. <br></p><p><br></p><p><b>Files in notes</b><br></p><p>Files can be uploaded to notes. If its an image, the picture will be put into the note.<br></p><p>Images added to notes will have the text pulled out so it can be searched (This isn't super accurate so don't rely to heavily on it.) The text can be updated at any time.<br></p><p><br></p><p><b>Deleting notes</b><br></p><p>When notes are deleted, none of the files related to the note are deleted.<br></p>
|
||||||
|
|
||||||
<!-- content copied from note -->
|
<!-- content copied from note -->
|
||||||
</div>
|
</div>
|
||||||
|
@ -133,10 +133,6 @@ export default new Vuex.Store({
|
|||||||
.then( ({data}) => {
|
.then( ({data}) => {
|
||||||
commit('setUserTotals', data)
|
commit('setUserTotals', data)
|
||||||
})
|
})
|
||||||
.catch( error => {
|
|
||||||
commit('destroyLoginToken')
|
|
||||||
location.reload()
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
5
package-lock.json
generated
5
package-lock.json
generated
@ -487,11 +487,6 @@
|
|||||||
"resolved": "https://registry.npmjs.org/dont-sniff-mimetype/-/dont-sniff-mimetype-1.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/dont-sniff-mimetype/-/dont-sniff-mimetype-1.1.0.tgz",
|
||||||
"integrity": "sha512-ZjI4zqTaxveH2/tTlzS1wFp+7ncxNZaIEWYg3lzZRHkKf5zPT/MnEG6WL0BhHMJUabkh8GeU5NL5j+rEUCb7Ug=="
|
"integrity": "sha512-ZjI4zqTaxveH2/tTlzS1wFp+7ncxNZaIEWYg3lzZRHkKf5zPT/MnEG6WL0BhHMJUabkh8GeU5NL5j+rEUCb7Ug=="
|
||||||
},
|
},
|
||||||
"dotenv": {
|
|
||||||
"version": "8.2.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-8.2.0.tgz",
|
|
||||||
"integrity": "sha512-8sJ78ElpbDJBHNeBzUbUVLsqKdccaa/BXF1uPTw3GrvQTBgrQrtObr2mUrE38vzYd8cEv+m/JBfDLioYcfXoaw=="
|
|
||||||
},
|
|
||||||
"ecc-jsbn": {
|
"ecc-jsbn": {
|
||||||
"version": "0.1.2",
|
"version": "0.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz",
|
||||||
|
@ -11,7 +11,6 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"body-parser": "^1.18.3",
|
"body-parser": "^1.18.3",
|
||||||
"cheerio": "^1.0.0-rc.3",
|
"cheerio": "^1.0.0-rc.3",
|
||||||
"dotenv": "^8.2.0",
|
|
||||||
"express": "^4.16.4",
|
"express": "^4.16.4",
|
||||||
"express-rate-limit": "^5.1.1",
|
"express-rate-limit": "^5.1.1",
|
||||||
"gm": "^1.23.1",
|
"gm": "^1.23.1",
|
||||||
|
@ -3,9 +3,9 @@ const mysql = require('mysql2');
|
|||||||
|
|
||||||
// Create the connection pool.
|
// Create the connection pool.
|
||||||
const pool = mysql.createPool({
|
const pool = mysql.createPool({
|
||||||
host: process.env.DB_HOST,
|
host: 'localhost',
|
||||||
user: process.env.DB_USER,
|
user: 'dev',
|
||||||
password: process.env.DB_PASS,
|
password: "LazaLinga&33Can't!Do!That34",
|
||||||
database: 'application',
|
database: 'application',
|
||||||
waitForConnections: true,
|
waitForConnections: true,
|
||||||
connectionLimit: 20,
|
connectionLimit: 20,
|
||||||
|
@ -2,16 +2,16 @@ var jwt = require('jsonwebtoken');
|
|||||||
|
|
||||||
let Auth = {}
|
let Auth = {}
|
||||||
|
|
||||||
const tokenSecretKey = process.env.JSON_KEY
|
const secretKey = '@TODO define secret constant its important!!!'
|
||||||
|
|
||||||
Auth.createToken = (userId) => {
|
Auth.createToken = (userId) => {
|
||||||
const signedData = {'id': userId, 'date':Date.now()}
|
const signedData = {'id': userId, 'date':Date.now()}
|
||||||
const token = jwt.sign(signedData, tokenSecretKey)
|
const token = jwt.sign(signedData, secretKey)
|
||||||
return token
|
return token
|
||||||
}
|
}
|
||||||
Auth.decodeToken = (token) => {
|
Auth.decodeToken = (token) => {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
jwt.verify(token, tokenSecretKey, function(err, decoded){
|
jwt.verify(token, secretKey, function(err, decoded){
|
||||||
if(err || decoded.id == undefined){
|
if(err || decoded.id == undefined){
|
||||||
reject('Bad Token')
|
reject('Bad Token')
|
||||||
return
|
return
|
||||||
|
@ -1,151 +0,0 @@
|
|||||||
let SiteScrape = module.exports = {}
|
|
||||||
|
|
||||||
//
|
|
||||||
// $ = the cheerio scrape object
|
|
||||||
//
|
|
||||||
|
|
||||||
const removeWhitespace = /\s+/g
|
|
||||||
|
|
||||||
const commonWords = ['share','facebook','twitter','reddit','be','have','do','say','get','make','go','know','take','see','come','think','look','want',
|
|
||||||
'give','use','find','tell','ask','work','seem','feel','try','leave','call','good','new','first','last','long','great','little','own','other','old',
|
|
||||||
'right','big','high','different','small','large','next','early','young','important','few','public','bad','same','able','to','of','in','for','on',
|
|
||||||
'with','at','by','from','up','about','into','over','after','the','and','a','that','I','it','not','he','as','you','this','but','his','they','her',
|
|
||||||
'she','or','an','will','my','one','all','would','there','their','and','that','but','or','as','if','when','than','because','while','where','after',
|
|
||||||
'so','though','since','until','whether','before','although','nor','like','once','unless','now','except','are','also','is','your','its']
|
|
||||||
|
|
||||||
SiteScrape.getTitle = ($) => {
|
|
||||||
|
|
||||||
let title = $('title').text().replace(removeWhitespace, " ")
|
|
||||||
return title
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
//Finds all urls in text, removes duplicates, makes sure they have https://
|
|
||||||
SiteScrape.getCleanUrls = (textBlock) => {
|
|
||||||
//Find all URLs in text
|
|
||||||
//@TODO - Use the process text library for this function
|
|
||||||
const urlPattern = /(?:(?:https?|ftp|file):\/\/|www\.|ftp\.)(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[-A-Z0-9+&@#/%=~_|$?!:,.])*(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[A-Z0-9+&@#/%=~_|$])/igm
|
|
||||||
let allUrls = textBlock.match(urlPattern)
|
|
||||||
|
|
||||||
if(allUrls == null){
|
|
||||||
return []
|
|
||||||
}
|
|
||||||
|
|
||||||
//Every URL needs HTTPS!!!
|
|
||||||
let foundUrls = []
|
|
||||||
allUrls.forEach( (item, index) => {
|
|
||||||
//add protocol if it is missing
|
|
||||||
if(item.indexOf('https://') == -1 && item.indexOf('http://') == -1){
|
|
||||||
allUrls[index] = 'https://'+item
|
|
||||||
}
|
|
||||||
//convert http to https
|
|
||||||
if(item.indexOf('http://') >= 0){
|
|
||||||
allUrls[index] = item.replace('http://','https://')
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
//Remove all duplicates
|
|
||||||
foundUrls = [...new Set(allUrls)]
|
|
||||||
|
|
||||||
return foundUrls
|
|
||||||
}
|
|
||||||
|
|
||||||
//Site hostname with https:// eg: https://www.google.com
|
|
||||||
SiteScrape.getHostName = (url) => {
|
|
||||||
|
|
||||||
var hostname = 'https://'+(new URL(url)).hostname;
|
|
||||||
console.log('hostname', hostname)
|
|
||||||
return hostname
|
|
||||||
}
|
|
||||||
|
|
||||||
// URL for image that can be downloaded to represent website
|
|
||||||
SiteScrape.getDisplayImage = ($, url) => {
|
|
||||||
|
|
||||||
const hostname = SiteScrape.getHostName(url)
|
|
||||||
|
|
||||||
let metaImg = $('meta[property="og:image"]')
|
|
||||||
let shortcutIcon = $('link[rel="shortcut icon"]')
|
|
||||||
let favicon = $('link[rel="icon"]')
|
|
||||||
let randomImg = $('img')
|
|
||||||
|
|
||||||
console.log('----')
|
|
||||||
|
|
||||||
//Scrape metadata for page image
|
|
||||||
//Grab the first random image we find
|
|
||||||
if(randomImg && randomImg[0] && randomImg[0].attribs){
|
|
||||||
thumbnail = hostname + randomImg[0].attribs.src
|
|
||||||
console.log('random img '+thumbnail)
|
|
||||||
}
|
|
||||||
//Grab the favicon of the site
|
|
||||||
if(favicon && favicon[0] && favicon[0].attribs){
|
|
||||||
thumbnail = hostname + favicon[0].attribs.href
|
|
||||||
console.log('favicon '+thumbnail)
|
|
||||||
}
|
|
||||||
//Grab the shortcut icon
|
|
||||||
if(shortcutIcon && shortcutIcon[0] && shortcutIcon[0].attribs){
|
|
||||||
thumbnail = hostname + shortcutIcon[0].attribs.href
|
|
||||||
console.log('shortcut '+thumbnail)
|
|
||||||
}
|
|
||||||
//Grab the presentation image for the site
|
|
||||||
if(metaImg && metaImg[0] && metaImg[0].attribs){
|
|
||||||
thumbnail = metaImg[0].attribs.content
|
|
||||||
console.log('ogImg '+thumbnail)
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('-----')
|
|
||||||
return thumbnail
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get all the site text and parse out the words that appear most
|
|
||||||
SiteScrape.getKeywords = ($) => {
|
|
||||||
|
|
||||||
let majorContent = ''
|
|
||||||
|
|
||||||
majorContent += $('[class*=content]').text()
|
|
||||||
.replace(removeWhitespace, " ") //Remove all whitespace
|
|
||||||
.replace(/\W\s/g, '') //Remove all non alphanumeric characters
|
|
||||||
.substring(0,3000) //Limit to 3000 characters
|
|
||||||
.toLowerCase()
|
|
||||||
|
|
||||||
//Count frequency of each word in scraped text
|
|
||||||
let frequency = {}
|
|
||||||
majorContent.split(' ').forEach(word => {
|
|
||||||
if(commonWords.includes(word)){
|
|
||||||
return //Exclude certain words
|
|
||||||
}
|
|
||||||
if(!frequency[word]){
|
|
||||||
frequency[word] = 0
|
|
||||||
}
|
|
||||||
frequency[word]++
|
|
||||||
})
|
|
||||||
|
|
||||||
//Create a sortable array
|
|
||||||
var sortable = [];
|
|
||||||
for (var index in frequency) {
|
|
||||||
if(frequency[index] > 1){
|
|
||||||
sortable.push([index, frequency[index]]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Sort them by most used words in the list
|
|
||||||
sortable.sort(function(a, b) {
|
|
||||||
return b[1] - a[1];
|
|
||||||
});
|
|
||||||
|
|
||||||
let finalWords = []
|
|
||||||
for(let i=0; i<5; i++){
|
|
||||||
if(sortable[i] && sortable[i][0]){
|
|
||||||
finalWords.push(sortable[i][0])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(finalWords.length > 0){
|
|
||||||
return 'Keywords: ' + finalWords.join(', ')
|
|
||||||
}
|
|
||||||
return ''
|
|
||||||
}
|
|
||||||
|
|
||||||
SiteScrape.getMainText = ($) => {}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,17 +1,11 @@
|
|||||||
//Set up environmental variables, pulled from .env file used as process.env.DB_HOST
|
|
||||||
const os = require('os') //Used to get path of home directory
|
|
||||||
const result = require('dotenv').config({ path:(os.homedir()+'/.env') })
|
|
||||||
|
|
||||||
//Allow user of @ in in require calls. Config in package.json
|
//Allow user of @ in in require calls. Config in package.json
|
||||||
require('module-alias/register')
|
require('module-alias/register')
|
||||||
|
|
||||||
//Auth helper, used for decoding users web token
|
|
||||||
let Auth = require('@helpers/Auth')
|
let Auth = require('@helpers/Auth')
|
||||||
|
|
||||||
//Helmet adds additional security to express server
|
|
||||||
const helmet = require('helmet')
|
const helmet = require('helmet')
|
||||||
|
|
||||||
//Setup express server
|
|
||||||
const express = require('express')
|
const express = require('express')
|
||||||
const app = express()
|
const app = express()
|
||||||
app.use( helmet() )
|
app.use( helmet() )
|
||||||
@ -134,18 +128,6 @@ app.use(function(req, res, next){
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
// Testing Area
|
|
||||||
// let att = require('@models/Attachment')
|
|
||||||
// let testUrl = 'https://dba.stackexchange.com/questions/23908/how-to-search-a-mysql-database-with-encrypted-fields'
|
|
||||||
// testUrl = 'https://www.solidscribe.com/#/'
|
|
||||||
// console.log('About to scrape: ', testUrl)
|
|
||||||
// att.processUrl(61, 3213, testUrl)
|
|
||||||
// .then(results => {
|
|
||||||
// console.log('Scrape happened')
|
|
||||||
// })
|
|
||||||
//
|
|
||||||
//
|
|
||||||
|
|
||||||
//Test
|
//Test
|
||||||
app.get(prefix, (req, res) => res.send('The api is running'))
|
app.get(prefix, (req, res) => res.send('The api is running'))
|
||||||
|
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
let db = require('@config/database')
|
let db = require('@config/database')
|
||||||
|
|
||||||
let SiteScrape = require('@helpers/SiteScrape')
|
|
||||||
|
|
||||||
let Attachment = module.exports = {}
|
let Attachment = module.exports = {}
|
||||||
|
|
||||||
const cheerio = require('cheerio')
|
const cheerio = require('cheerio')
|
||||||
@ -244,8 +242,32 @@ Attachment.scanTextForWebsites = (io, userId, noteId, noteText) => {
|
|||||||
|
|
||||||
Attachment.urlForNote(userId, noteId).then(attachments => {
|
Attachment.urlForNote(userId, noteId).then(attachments => {
|
||||||
|
|
||||||
//Pull all the URLs out of the text
|
//Find all URLs in text
|
||||||
let foundUrls = SiteScrape.getCleanUrls(noteText)
|
//@TODO - Use the process text library for this function
|
||||||
|
const urlPattern = /(?:(?:https?|ftp|file):\/\/|www\.|ftp\.)(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[-A-Z0-9+&@#/%=~_|$?!:,.])*(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[A-Z0-9+&@#/%=~_|$])/igm
|
||||||
|
let allUrls = noteText.match(urlPattern)
|
||||||
|
|
||||||
|
if(allUrls == null){
|
||||||
|
allUrls = []
|
||||||
|
}
|
||||||
|
|
||||||
|
//Every URL needs HTTPS!!!
|
||||||
|
let foundUrls = []
|
||||||
|
allUrls.forEach( (item, index) => {
|
||||||
|
//Every URL should have HTTPS
|
||||||
|
if(item.indexOf('https://') == -1 && item.indexOf('http://') == -1){
|
||||||
|
allUrls[index] = 'https://'+item
|
||||||
|
}
|
||||||
|
//URLs should all have HTTPS!!!
|
||||||
|
if(item.indexOf('http://') >= 0){
|
||||||
|
allUrls[index] = item.replace('http://','https://')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
//Remove all duplicates
|
||||||
|
foundUrls = [...new Set(allUrls)]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Go through each saved URL, remove new URLs from saved URLs
|
//Go through each saved URL, remove new URLs from saved URLs
|
||||||
//If a URL is not found, delete it
|
//If a URL is not found, delete it
|
||||||
@ -364,6 +386,14 @@ Attachment.processUrl = (userId, noteId, url) => {
|
|||||||
|
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
|
|
||||||
|
const excludeWords = ['share','facebook','twitter','reddit','be','have','do','say','get','make','go','know','take','see','come','think','look','want',
|
||||||
|
'give','use','find','tell','ask','work','seem','feel','try','leave','call','good','new','first','last','long','great','little','own','other','old',
|
||||||
|
'right','big','high','different','small','large','next','early','young','important','few','public','bad','same','able','to','of','in','for','on',
|
||||||
|
'with','at','by','from','up','about','into','over','after','the','and','a','that','I','it','not','he','as','you','this','but','his','they','her',
|
||||||
|
'she','or','an','will','my','one','all','would','there','their','and','that','but','or','as','if','when','than','because','while','where','after',
|
||||||
|
'so','though','since','until','whether','before','although','nor','like','once','unless','now','except','are','also','is','your','its']
|
||||||
|
|
||||||
|
var removeWhitespace = /\s+/g
|
||||||
|
|
||||||
const options = {
|
const options = {
|
||||||
uri: url,
|
uri: url,
|
||||||
@ -398,33 +428,70 @@ Attachment.processUrl = (userId, noteId, url) => {
|
|||||||
})
|
})
|
||||||
.then($ => {
|
.then($ => {
|
||||||
|
|
||||||
//Clear timeout that would end this function
|
|
||||||
clearTimeout(requestTimeout)
|
clearTimeout(requestTimeout)
|
||||||
|
|
||||||
|
var desiredSearchText = ''
|
||||||
|
|
||||||
|
let pageTitle = $('title').text().replace(removeWhitespace, " ")
|
||||||
|
desiredSearchText += pageTitle + "\n"
|
||||||
|
|
||||||
// let header = $('h1').text().replace(removeWhitespace, " ")
|
// let header = $('h1').text().replace(removeWhitespace, " ")
|
||||||
// desiredSearchText += header + "\n"
|
// desiredSearchText += header + "\n"
|
||||||
|
|
||||||
const pageTitle = SiteScrape.getTitle($)
|
//Scrape metadata for page image
|
||||||
|
let metadata = $('meta[property="og:image"]')
|
||||||
|
if(metadata && metadata[0] && metadata[0].attribs){
|
||||||
|
thumbnail = metadata[0].attribs.content
|
||||||
|
}
|
||||||
|
|
||||||
const hostname = SiteScrape.getHostName(url)
|
|
||||||
|
|
||||||
const thumbnail = SiteScrape.getDisplayImage($, url)
|
let majorContent = ''
|
||||||
|
majorContent += $('[class*=content]').text()
|
||||||
|
.replace(removeWhitespace, " ") //Remove all whitespace
|
||||||
|
.replace(/\W\s/g, '') //Remove all non alphanumeric characters
|
||||||
|
.substring(0,3000)
|
||||||
|
.toLowerCase()
|
||||||
|
majorContent += $('[id*=content]').text().replace(removeWhitespace, " ")
|
||||||
|
.replace(removeWhitespace, " ") //Remove all whitespace
|
||||||
|
.replace(/\W\s/g, '') //Remove all non alphanumeric characters
|
||||||
|
.substring(0,3000) //Limit characters
|
||||||
|
.toLowerCase()
|
||||||
|
|
||||||
const keywords = SiteScrape.getKeywords($)
|
//Count frequency of each word in scraped text
|
||||||
|
let frequency = {}
|
||||||
var desiredSearchText = ''
|
majorContent.split(' ').forEach(word => {
|
||||||
desiredSearchText += pageTitle + "\n"
|
if(excludeWords.includes(word)){
|
||||||
desiredSearchText += keywords
|
return //Exclude certain words
|
||||||
|
}
|
||||||
console.log({
|
if(!frequency[word]){
|
||||||
pageTitle,
|
frequency[word] = 0
|
||||||
hostname,
|
}
|
||||||
thumbnail,
|
frequency[word]++
|
||||||
keywords
|
|
||||||
})
|
})
|
||||||
|
|
||||||
|
//Create a sortable array
|
||||||
|
var sortable = [];
|
||||||
|
for (var index in frequency) {
|
||||||
|
if(frequency[index] > 1){
|
||||||
|
sortable.push([index, frequency[index]]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// throw new Error('Ending this function early.')
|
//Sort them by most used words in the list
|
||||||
|
sortable.sort(function(a, b) {
|
||||||
|
return b[1] - a[1];
|
||||||
|
});
|
||||||
|
|
||||||
|
let finalWords = []
|
||||||
|
for(let i=0; i<5; i++){
|
||||||
|
if(sortable[i] && sortable[i][0]){
|
||||||
|
finalWords.push(sortable[i][0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(finalWords.length > 0){
|
||||||
|
desiredSearchText += 'Keywords: ' + finalWords.join(', ')
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// console.log('TexT Scraped')
|
// console.log('TexT Scraped')
|
||||||
@ -465,10 +532,9 @@ Attachment.processUrl = (userId, noteId, url) => {
|
|||||||
|
|
||||||
})
|
})
|
||||||
.catch(error => {
|
.catch(error => {
|
||||||
// console.log('Scrape pooped out')
|
console.log('Issue with scrape')
|
||||||
// console.log('Issue with scrape')
|
|
||||||
console.log(error)
|
console.log(error)
|
||||||
// resolve('')
|
resolve('')
|
||||||
})
|
})
|
||||||
|
|
||||||
requestTimeout = setTimeout( () => {
|
requestTimeout = setTimeout( () => {
|
||||||
|
@ -167,10 +167,10 @@ Note.update = (io, userId, noteId, noteText, noteTitle, color, pinned, archived,
|
|||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
|
|
||||||
//Prevent note loss if it saves with empty text
|
//Prevent note loss if it saves with empty text
|
||||||
//if(ProcessText.removeHtml(noteText) == ''){
|
if(ProcessText.removeHtml(noteText) == ''){
|
||||||
// console.log('Not saving empty note')
|
// console.log('Not saving empty note')
|
||||||
// resolve(false)
|
// resolve(false)
|
||||||
//}
|
}
|
||||||
|
|
||||||
const now = Math.round((+new Date)/1000)
|
const now = Math.round((+new Date)/1000)
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ var multer = require('multer')
|
|||||||
var upload = multer({ dest: '../staticFiles/' }) //@TODO make this a global value
|
var upload = multer({ dest: '../staticFiles/' }) //@TODO make this a global value
|
||||||
let router = express.Router()
|
let router = express.Router()
|
||||||
|
|
||||||
let Attachment = require('@models/Attachment')
|
let Attachment = require('@models/Attachment');
|
||||||
let Note = require('@models/Note')
|
let Note = require('@models/Note')
|
||||||
let userId = null
|
let userId = null
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user