Searching, url indexing
* Added a help page * Cleaned up home and login pages * Menu is hidden when on notes section of app * Added username to login data * Notes now change to the color selected for the note * Note save function has a 500ms debounce to prevent spamming * Solr results now displays content from notes, tags and attachments * All note data is now indexed in solr * Notes containing URLs are now scraped and put into tag solr index * Attachments that are removed from note are deleted when url is removed * Minor little tweaks and fixes all over the place
This commit is contained in:
190
server/models/Attachment.js
Normal file
190
server/models/Attachment.js
Normal file
@@ -0,0 +1,190 @@
|
||||
let db = require('@config/database')
|
||||
|
||||
let Attachment = module.exports = {}
|
||||
|
||||
const cheerio = require('cheerio');
|
||||
const rp = require('request-promise');
|
||||
|
||||
Attachment.forNote = (userId, noteId) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.promise()
|
||||
.query(`SELECT * FROM attachment WHERE user_id = ? AND note_id = ? AND attachment_type = 1;`, [userId, noteId])
|
||||
.then((rows, fields) => {
|
||||
resolve(rows[0]) //Return all tags found by query
|
||||
})
|
||||
.catch(console.log)
|
||||
})
|
||||
}
|
||||
|
||||
Attachment.delete = (attachmentId) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.promise()
|
||||
.query(`DELETE FROM attachment WHERE id = ?`, [attachmentId])
|
||||
.then((rows, fields) => {
|
||||
resolve(rows[0]) //Return all tags found by query
|
||||
})
|
||||
.catch(console.log)
|
||||
})
|
||||
}
|
||||
|
||||
Attachment.scanTextForWebsites = (userId, noteId, noteText) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
|
||||
let solrAttachmentText = '' //Final searchable scrape text for note
|
||||
|
||||
if(noteText.length == 0){ resolve(solrAttachmentText) }
|
||||
|
||||
Attachment.forNote(userId, noteId).then(attachments => {
|
||||
|
||||
//Find all URLs in text
|
||||
const urlPattern = /(?:(?:https?|ftp|file):\/\/|www\.|ftp\.)(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[-A-Z0-9+&@#/%=~_|$?!:,.])*(?:\([-A-Z0-9+&@#/%=~_|$?!:,.]*\)|[A-Z0-9+&@#/%=~_|$])/igm
|
||||
let foundUrls = noteText.match(urlPattern)
|
||||
|
||||
//Go through each attachment, check for existing URLs
|
||||
attachments.forEach(attachment => {
|
||||
//URL already scraped, push text and continue
|
||||
let urlIndex = foundUrls.indexOf( attachment.url )
|
||||
|
||||
if(urlIndex != -1){
|
||||
solrAttachmentText += attachment.text
|
||||
foundUrls.splice(urlIndex, 1) //Remove existing from set of found
|
||||
} else {
|
||||
Attachment.delete(attachment.id)
|
||||
}
|
||||
})
|
||||
|
||||
//No newly scraped URLs, resolve with looked up attachment text
|
||||
if(foundUrls.length == 0){
|
||||
resolve(solrAttachmentText)
|
||||
}
|
||||
|
||||
//Process the remaining URLs into attachments
|
||||
Attachment.scrapeUrlsCreateAttachments(userId, noteId, foundUrls).then( freshlyScrapedText => {
|
||||
|
||||
solrAttachmentText += freshlyScrapedText
|
||||
resolve(solrAttachmentText)
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
//Return scraped text from each URL
|
||||
Attachment.scrapeUrlsCreateAttachments = (userId, noteId, foundUrls) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
|
||||
console.log('About to scrape')
|
||||
console.log(foundUrls)
|
||||
|
||||
if(foundUrls == null || foundUrls.length == 0){resolve('')}
|
||||
|
||||
let processedCount = 0
|
||||
let scrapedText = ''
|
||||
|
||||
//Process each URL passd to function, a DB entry will be created for each scrape
|
||||
foundUrls.forEach(url => {
|
||||
Attachment.processUrl(userId, noteId, url).then( freshlyScrapedText => {
|
||||
|
||||
scrapedText += freshlyScrapedText
|
||||
processedCount ++
|
||||
|
||||
//All URLs have been scraped, return data
|
||||
if(processedCount == foundUrls.length){
|
||||
resolve(scrapedText)
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Attachment.processUrl = (userId, noteId, url) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
|
||||
const excludeWords = ['share','facebook','twitter','reddit','be','have','do','say','get','make','go','know','take','see','come','think','look','want',
|
||||
'give','use','find','tell','ask','work','seem','feel','try','leave','call','good','new','first','last','long','great','little','own','other','old',
|
||||
'right','big','high','different','small','large','next','early','young','important','few','public','bad','same','able','to','of','in','for','on',
|
||||
'with','at','by','from','up','about','into','over','after','the','and','a','that','I','it','not','he','as','you','this','but','his','they','her',
|
||||
'she','or','an','will','my','one','all','would','there','their','and','that','but','or','as','if','when','than','because','while','where','after',
|
||||
'so','though','since','until','whether','before','although','nor','like','once','unless','now','except','are','also','is','your','its']
|
||||
|
||||
var removeWhitespace = /\s+/g
|
||||
|
||||
// console.log('Scraping ', website)
|
||||
const options = {
|
||||
uri: url,
|
||||
transform: function (body) {
|
||||
return cheerio.load(body);
|
||||
}
|
||||
}
|
||||
|
||||
rp(options).then($ => {
|
||||
|
||||
var desiredSearchText = ''
|
||||
|
||||
let pageTitle = $('title').text().replace(removeWhitespace, " ")
|
||||
desiredSearchText += pageTitle + "\n"
|
||||
|
||||
let header = $('h1').text().replace(removeWhitespace, " ")
|
||||
desiredSearchText += header + "\n"
|
||||
|
||||
let majorContent = ''
|
||||
majorContent += $('[class*=content]').text()
|
||||
.replace(removeWhitespace, " ") //Remove all whitespace
|
||||
.replace(/\W\s/g, '') //Remove all non alphanumeric characters
|
||||
.substring(0,3000)
|
||||
.toLowerCase()
|
||||
majorContent += $('[id*=content]').text().replace(removeWhitespace, " ")
|
||||
.replace(removeWhitespace, " ") //Remove all whitespace
|
||||
.replace(/\W\s/g, '') //Remove all non alphanumeric characters
|
||||
.substring(0,3000) //Limit characters
|
||||
.toLowerCase()
|
||||
|
||||
//Count frequency of each word in scraped text
|
||||
let frequency = {}
|
||||
majorContent.split(' ').forEach(word => {
|
||||
if(excludeWords.includes(word)){
|
||||
return //Exclude certain words
|
||||
}
|
||||
if(!frequency[word]){
|
||||
frequency[word] = 0
|
||||
}
|
||||
frequency[word]++
|
||||
})
|
||||
|
||||
//Create a sortable array
|
||||
var sortable = [];
|
||||
for (var index in frequency) {
|
||||
if(frequency[index] > 1){
|
||||
sortable.push([index, frequency[index]]);
|
||||
}
|
||||
}
|
||||
|
||||
//Sort them by most used words in the list
|
||||
sortable.sort(function(a, b) {
|
||||
return b[1] - a[1];
|
||||
});
|
||||
|
||||
let finalWords = []
|
||||
for(let i=0; i<15; i++){
|
||||
if(sortable[i][0]){
|
||||
finalWords.push(sortable[i][0])
|
||||
}
|
||||
}
|
||||
|
||||
desiredSearchText += finalWords.join(', ')
|
||||
|
||||
const created = Math.round((+new Date)/1000)
|
||||
|
||||
//Create attachment in DB with scrape text and provided data
|
||||
db.promise()
|
||||
.query(`INSERT INTO attachment
|
||||
(note_id, user_id, attachment_type, text, url, last_indexed)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`, [noteId, userId, 1, desiredSearchText, url, created])
|
||||
.then((rows, fields) => {
|
||||
resolve(desiredSearchText) //Return found text
|
||||
})
|
||||
.catch(console.log)
|
||||
|
||||
})
|
||||
})
|
||||
}
|
@@ -1,10 +1,26 @@
|
||||
let db = require('@config/database')
|
||||
|
||||
let Tags = require('@models/Tags')
|
||||
let Attachment = require('@models/Attachment')
|
||||
|
||||
var rp = require('request-promise');
|
||||
var SolrNode = require('solr-node');
|
||||
|
||||
let Notes = module.exports = {}
|
||||
|
||||
// Create client
|
||||
var client = new SolrNode({
|
||||
host: '127.0.0.1',
|
||||
port: '8983',
|
||||
core: 'note',
|
||||
protocol: 'http'
|
||||
});
|
||||
|
||||
Notes.create = (userId, noteText) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
|
||||
if(userId == null || userId < 10){ reject('User Id required to create note') }
|
||||
|
||||
const created = Math.round((+new Date)/1000)
|
||||
|
||||
db.promise()
|
||||
@@ -24,6 +40,33 @@ Notes.update = (userId, noteId, noteText, fancyInput, color) => {
|
||||
db.promise()
|
||||
.query('UPDATE notes SET text = ?, raw_input = ?, updated = ?, color = ? WHERE id = ? AND user = ? LIMIT 1', [noteText, fancyInput, now, color, noteId, userId])
|
||||
.then((rows, fields) => {
|
||||
|
||||
//Process note text and attachment data
|
||||
Attachment.scanTextForWebsites(userId, noteId, noteText).then( attachmentText => {
|
||||
//
|
||||
// Update Solr index
|
||||
//
|
||||
Tags.string(userId, noteId).then(tagString => {
|
||||
// JSON Data
|
||||
var data = {
|
||||
'id': noteId,//string - ID of note
|
||||
'user_id': userId,//int
|
||||
'note_text': noteText,
|
||||
'notes_tags': tagString,
|
||||
'attachment_text': attachmentText,
|
||||
};
|
||||
// Update document to Solr server
|
||||
client.update(data, function(err, result) {
|
||||
if (err) { console.log(err); return; }
|
||||
console.log('Note Solr Update, node/solrid ('+noteId+'):');
|
||||
console.log(result.responseHeader)
|
||||
});
|
||||
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
//Send back updated response
|
||||
resolve(rows[0])
|
||||
})
|
||||
.catch(console.log)
|
||||
@@ -39,7 +82,7 @@ Notes.delete = (userId, noteId) => {
|
||||
Notes.get = (userId, noteId) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.promise()
|
||||
.query('SELECT text, updated, raw_input FROM notes WHERE user = ? AND id = ? LIMIT 1', [userId,noteId])
|
||||
.query('SELECT text, updated, raw_input, color FROM notes WHERE user = ? AND id = ? LIMIT 1', [userId,noteId])
|
||||
.then((rows, fields) => {
|
||||
resolve(rows[0][0])
|
||||
})
|
||||
@@ -58,31 +101,36 @@ Notes.getLatest = (userId) => {
|
||||
})
|
||||
}
|
||||
|
||||
Notes.solrQuery = (userId, searchQuery, searchTags) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
|
||||
if(searchQuery != '' && searchQuery != null){
|
||||
let urlQuery = `/solr/note/select?hl.fl=note_text&hl=on&q=user_id:${userId} AND note_text:${searchQuery}&wt=json`
|
||||
urlQuery = `/solr/note/select?
|
||||
hl.fl=note_text,attachment_text,notes_tags&
|
||||
hl=on&
|
||||
q=user_id:${userId} AND (note_text:${searchQuery} OR attachment_text:${searchQuery} OR notes_tags:${searchQuery})&
|
||||
wt=json&
|
||||
fl=id&
|
||||
hl.fl=note_text,attachment_text,notes_tags&
|
||||
hl.snippets=20&
|
||||
hl.maxAnalyzedChars=100000`
|
||||
|
||||
rp('http://127.0.0.1:8983'+urlQuery)
|
||||
.then(function (htmlString) {
|
||||
let solrResult = JSON.parse(htmlString)
|
||||
resolve(solrResult)
|
||||
})
|
||||
} else {
|
||||
resolve([])
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Notes.search = (userId, searchQuery, searchTags) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
|
||||
|
||||
//Default note lookup gets all notes
|
||||
let noteSearchQuery = `
|
||||
SELECT notes.id, SUBSTRING(text, 1, 200) as text, updated, color
|
||||
FROM notes
|
||||
LEFT JOIN notes_tags ON (notes.id = notes_tags.note_id)
|
||||
WHERE user = ?`
|
||||
let searchParams = [userId]
|
||||
|
||||
if(searchQuery != ''){
|
||||
//If a search query is defined, search notes for that word
|
||||
searchParams.push('%'+searchQuery+'%')
|
||||
noteSearchQuery += ' AND text LIKE ?'
|
||||
}
|
||||
if(searchTags.length > 0){
|
||||
//If tags are passed, use those tags in search
|
||||
searchParams.push(searchTags)
|
||||
noteSearchQuery += ' AND notes_tags.tag_id IN (?)'
|
||||
}
|
||||
|
||||
//Finish up note query
|
||||
noteSearchQuery += ' GROUP BY notes.id ORDER BY updated DESC, created DESC, id DESC'
|
||||
|
||||
|
||||
//Define return data objects
|
||||
let returnData = {
|
||||
@@ -90,54 +138,113 @@ Notes.search = (userId, searchQuery, searchTags) => {
|
||||
'tags':[]
|
||||
}
|
||||
|
||||
db.promise()
|
||||
.query(noteSearchQuery, searchParams)
|
||||
.then((noteRows, noteFields) => {
|
||||
|
||||
//Push all notes
|
||||
returnData['notes'] = noteRows[0]
|
||||
Notes.solrQuery(userId, searchQuery, searchTags).then( solrResult => {
|
||||
|
||||
//pull out all note ids so we can fetch all tags for those notes
|
||||
let noteIds = []
|
||||
returnData['notes'].forEach(note => {
|
||||
let highlights = solrResult.highlighting
|
||||
|
||||
//Grab note ID for finding tags
|
||||
noteIds.push(note.id)
|
||||
|
||||
//Attempt to pull string out of first tag in note
|
||||
let reg = note.text.match(/<([\w]+)[^>]*>(.*?)<\/\1>/)
|
||||
if(reg != null){
|
||||
note.text = reg[2]
|
||||
}
|
||||
//Return all notes with HTML tags pulled out
|
||||
note.text = note.text
|
||||
.replace(/&[#A-Za-z0-9]+;/g,'') //Rip out all HTML entities
|
||||
.replace(/<[^>]+>/g, '') //Rip out all HTML tags
|
||||
|
||||
})
|
||||
//Parse Note ID's from solr search
|
||||
let solrNoteIds = []
|
||||
if(solrResult.response){
|
||||
solrResult.response.docs.forEach(item => {
|
||||
solrNoteIds.push(parseInt(item.id))
|
||||
})
|
||||
}
|
||||
|
||||
//If no notes are returned, there are no tags, return empty
|
||||
if(noteIds.length == 0){
|
||||
resolve(returnData)
|
||||
//Default note lookup gets all notes
|
||||
let noteSearchQuery = `
|
||||
SELECT notes.id, SUBSTRING(text, 1, 200) as text, updated, color
|
||||
FROM notes
|
||||
LEFT JOIN notes_tags ON (notes.id = notes_tags.note_id)
|
||||
WHERE user = ?`
|
||||
let searchParams = [userId]
|
||||
|
||||
if(solrNoteIds.length > 0){
|
||||
searchParams.push(solrNoteIds)
|
||||
noteSearchQuery += ' AND notes.id IN (?)'
|
||||
}
|
||||
|
||||
//Only show tags of selected notes
|
||||
// if(searchQuery != ''){
|
||||
// //If a search query is defined, search notes for that word
|
||||
// searchParams.push('%'+searchQuery+'%')
|
||||
// noteSearchQuery += ' AND text LIKE ?'
|
||||
// }
|
||||
if(searchTags.length > 0){
|
||||
//If tags are passed, use those tags in search
|
||||
searchParams.push(searchTags)
|
||||
noteSearchQuery += ' AND notes_tags.tag_id IN (?)'
|
||||
}
|
||||
|
||||
//Finish up note query
|
||||
noteSearchQuery += ' GROUP BY notes.id ORDER BY updated DESC, created DESC, id DESC'
|
||||
|
||||
db.promise()
|
||||
.query(`SELECT tags.id, tags.text, count(tags.id) as usages FROM notes_tags
|
||||
JOIN tags ON (tags.id = notes_tags.tag_id)
|
||||
WHERE notes_tags.user_id = ?
|
||||
AND note_id IN (?)
|
||||
GROUP BY tags.id
|
||||
ORDER BY usages DESC;`,[userId, noteIds])
|
||||
.then((tagRows, tagFields) => {
|
||||
.query(noteSearchQuery, searchParams)
|
||||
.then((noteRows, noteFields) => {
|
||||
|
||||
returnData['tags'] = tagRows[0]
|
||||
//Push all notes
|
||||
returnData['notes'] = noteRows[0]
|
||||
|
||||
//pull out all note ids so we can fetch all tags for those notes
|
||||
let noteIds = []
|
||||
returnData['notes'].forEach(note => {
|
||||
|
||||
//Grab note ID for finding tags
|
||||
noteIds.push(note.id)
|
||||
|
||||
//Attempt to pull string out of first tag in note
|
||||
let reg = note.text.match(/<([\w]+)[^>]*>(.*?)<\/\1>/)
|
||||
if(reg != null){
|
||||
note.text = reg[2]
|
||||
}
|
||||
|
||||
//Return all notes with HTML tags pulled out
|
||||
note.text = note.text
|
||||
.replace(/&[#A-Za-z0-9]+;/g,'') //Rip out all HTML entities
|
||||
.replace(/<[^>]+>/g, '') //Rip out all HTML tags
|
||||
|
||||
note.note_highlights = []
|
||||
note.attachment_highlights = []
|
||||
note.tag_highlights = []
|
||||
|
||||
//Push in solr highlights
|
||||
if(highlights && highlights[note.id] && highlights[note.id].note_text){
|
||||
note['note_highlights'] = highlights[note.id].note_text
|
||||
}
|
||||
if(highlights && highlights[note.id] && highlights[note.id].attachment_text){
|
||||
note['attachment_highlights'] = highlights[note.id].attachment_text
|
||||
}
|
||||
if(highlights && highlights[note.id] && highlights[note.id].notes_tags){
|
||||
note['tag_highlights'] = highlights[note.id].notes_tags
|
||||
}
|
||||
})
|
||||
|
||||
//If no notes are returned, there are no tags, return empty
|
||||
if(noteIds.length == 0){
|
||||
resolve(returnData)
|
||||
}
|
||||
|
||||
//Only show tags of selected notes
|
||||
db.promise()
|
||||
.query(`SELECT tags.id, tags.text, count(tags.id) as usages FROM notes_tags
|
||||
JOIN tags ON (tags.id = notes_tags.tag_id)
|
||||
WHERE notes_tags.user_id = ?
|
||||
AND note_id IN (?)
|
||||
GROUP BY tags.id
|
||||
ORDER BY usages DESC;`,[userId, noteIds])
|
||||
.then((tagRows, tagFields) => {
|
||||
|
||||
returnData['tags'] = tagRows[0]
|
||||
|
||||
resolve(returnData)
|
||||
})
|
||||
.catch(console.log)
|
||||
|
||||
resolve(returnData)
|
||||
})
|
||||
.catch(console.log)
|
||||
|
||||
})
|
||||
.catch(console.log)
|
||||
|
||||
})
|
||||
}
|
@@ -96,6 +96,22 @@ Tags.get = (userId, noteId) => {
|
||||
})
|
||||
}
|
||||
|
||||
Tags.string = (userId, noteId) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
Tags.get(userId, noteId).then(tagArray => {
|
||||
|
||||
let tagString = ''
|
||||
tagArray.forEach( (tag, i) => {
|
||||
if(i > 0){ tagString += ',' }
|
||||
tagString += tag.text
|
||||
})
|
||||
//Output comma delimited list of tag strings
|
||||
resolve(tagString)
|
||||
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
Tags.lookup = (tagText) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
db.promise()
|
||||
|
@@ -23,8 +23,8 @@ User.login = (username, password) => {
|
||||
//User not found, create a new account with set data
|
||||
if(rows[0].length == 0){
|
||||
User.create(lowerName, password)
|
||||
.then(result => {
|
||||
resolve(result)
|
||||
.then(loginToken => {
|
||||
resolve(loginToken)
|
||||
})
|
||||
return
|
||||
}
|
||||
@@ -112,20 +112,5 @@ User.create = (username, password) => {
|
||||
.catch(console.log)
|
||||
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
//Just used for testing
|
||||
User.getUsername = (userId) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
|
||||
db.promise()
|
||||
.query('SELECT username FROM users WHERE id = ? LIMIT 1', [userId])
|
||||
.then((rows, fields) => {
|
||||
const data = rows[0][0]
|
||||
|
||||
resolve(data)
|
||||
})
|
||||
.catch(console.log)
|
||||
})
|
||||
}
|
Reference in New Issue
Block a user