SolidScribe/server/models/Attachment.js

let db = require('@config/database')

let SiteScrape = require('@helpers/SiteScrape')
const cs = require('@helpers/CryptoString')

let Attachment = module.exports = {}

const cheerio = require('cheerio')
const rp = require('request-promise')
const request = require('request')
const fs = require('fs')

const gm = require('gm')

const tesseract = require("node-tesseract-ocr")
const filePath = '../staticFiles/'

// Attachment.migrateOld

Attachment.textSearch = (userId, searchTerm) => {
	return new Promise((resolve, reject) => {

		const front = 5
		const tail = 150

		const query = `
			SELECT 
				*,
				substring(
					text,
			        IF(LOCATE(?, text) > ${tail}, LOCATE(?, text) - ${front}, 1),
			        ${tail} + LENGTH(?) + ${front}
				) as snippet
			FROM attachment 
			WHERE user_id = ?
			AND visible != 0
			AND MATCH(text)
			AGAINST(? IN NATURAL LANGUAGE MODE)
			LIMIT 1000`

		db.promise()
			.query(query, [searchTerm, searchTerm, searchTerm, userId, searchTerm])
			.then((rows, fields) => {
				resolve(rows[0]) //Return all attachments found by query
			})
		.catch(console.log)
	})
}

Attachment.search = (userId, noteId, attachmentType, offset, setSize, includeShared) => {
	console.log([userId, noteId, attachmentType, offset, setSize, includeShared])
	return new Promise((resolve, reject) => {

		let params = [userId]
		let query = `
			SELECT attachment.*, note.share_user_id FROM attachment 
			LEFT JOIN note ON (attachment.note_id = note.id)
			WHERE attachment.user_id = ? AND visible = 1 
			`

		if(noteId && noteId > 0){
			//
			// Show everything if note ID is present
			//
			query += 'AND attachment.note_id = ? '
			params.push(noteId)

		} else {
			//
			// Other filters if NO note id
			//

			if(attachmentType == 'links'){
				query += 'AND attachment_type = 1 '
			}
			if(attachmentType == 'files'){
				query += 'AND attachment_type > 1 '
			}

			query += `AND note.archived = ${ attachmentType == 'archived' ? '1':'0' } `
			query += `AND note.trashed = ${ attachmentType == 'trashed' ? '1':'0' } `

			if(!attachmentType){
				// Null note ID means it was pushed by bookmarklet
				query += 'OR attachment.note_id IS NULL '
			}
		}
		

		if(!noteId){
			const sharedOrNot = includeShared ? ' NOT ':' ' 
			query += `AND note.share_user_id IS${sharedOrNot}NULL `
		}


		query += 'ORDER BY last_indexed DESC '

		const limitOffset = parseInt(offset, 10) || 0 //Either parse int, or use zero
		const parsedSetSize = parseInt(setSize, 10) || 20
		query += ` LIMIT ${limitOffset}, ${parsedSetSize}`

		console.log(query)

		db.promise()
			.query(query, params)
			.then((rows, fields) => {
				resolve(rows[0]) //Return all attachments found by query
			})
		.catch(console.log)
	})
}

Attachment.urlForNote = (userId, noteId) => {
	return new Promise((resolve, reject) => {
		db.promise()
			.query(`SELECT * FROM attachment WHERE user_id = ? AND note_id = ? AND attachment_type = 1 ORDER BY last_indexed DESC;`, [userId, noteId])
			.then((rows, fields) => {
				resolve(rows[0]) //Return all attachments found by query
			})
		.catch(console.log)
	})
}

//Update attachment in database
Attachment.update = (userId, attachmentId, updatedText, noteId) => {
	return new Promise((resolve, reject) => {
		db.promise()
			.query(`UPDATE attachment SET text = ? WHERE id = ? AND user_id = ?`, 
				[updatedText, attachmentId, userId])
			.then((rows, fields) => {
				resolve(true)
			})
		.catch(console.log)
	})
}

Attachment.delete = (userId, attachmentId, urlDelete = false) => {

	let attachment = null
	let noteExists = true

	return new Promise((resolve, reject) => {
		db.promise()
			.query('SELECT * FROM attachment WHERE id = ? AND user_id = ? LIMIT 1', [attachmentId, userId])
			.then((rows, fields) => {

				//Attachment doesn't exist, return done
				if(rows[0].length == 0){
					return resolve(true)
				}

				attachment = rows[0][0]

				return db.promise().query('SELECT count(id) as `exists` FROM note WHERE id = ?', [attachment.note_id])

			})

			.then((rows, fields) => {

				noteExists = (rows[0][0]['exists'] > 0)

				let url = attachment.url
				const noteId = attachment.note_id

				//Try to delete file and thumbnail
				try { 
					fs.unlinkSync(filePath+attachment.file_location) 
				} catch(err) { console.error('File Does not exist') }
				try { 
					fs.unlinkSync(filePath+'thumb_'+attachment.file_location)
				} catch(err) { console.error('Thumbnail Does not exist') }

				//Do not delete link attachments, just hide them. They will be deleted if removed from note or if note is deleted
				if(attachment.attachment_type == 1 && !urlDelete && noteExists){
					db.promise()
						.query(`UPDATE attachment SET visible = 0 WHERE id = ?`, [attachmentId])
						.then((rows, fields) => resolve(true))
						.catch(console.log)

					return resolve(true)
				} else {
					db.promise()
						.query(`DELETE FROM attachment WHERE id = ?`, [attachmentId])
						.then((rows, fields) => resolve(true))
						.catch(console.log)
				}
			})
			.catch(console.log)
	})
}

Attachment.processUploadedFile = (userId, noteId, fileObject) => {
	return new Promise((resolve, reject) => {

		const rawFilename = fileObject.filename
		const extension = '.'+fileObject.originalname.split('.').pop()
		const goodFileName = rawFilename+extension
		const fileName = fileObject.originalname //Actual name of the file, dog.jpg

		//Rename random file name to one with an extension
		fs.rename(filePath+rawFilename, filePath+goodFileName, (err) => {

			const created = Math.round((+new Date)/1000)
			
			db.promise()
			.query(`
				INSERT INTO attachment 
					(note_id, user_id, attachment_type, \`text\`, last_indexed, file_location) 
				VALUES 
					(?, ?, ?, ?, ?, ?)
			`, [noteId, userId, 2, 'Add a description to -> '+fileName, created, goodFileName])
			.then((rows, fields) => {

				Attachment.generateThumbnail(goodFileName)

				//If its an image, scrape text
				if(true){

					// https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality
					//psm 3 - default, 11 - as much text as possible
					const config = { lang: "eng", oem: 1, psm: 3 }

					tesseract.recognize(filePath+goodFileName, config)
					.then(text => {

						text = text.slice(0, -1).trim()

						if(text.length > 5){
							console.log('Inserting text')
							db.promise().query(
								`UPDATE attachment SET text = ? WHERE id = ? AND user_id = ? LIMIT 1`,
								[text, rows[0].insertId, userId]
							).then(results => {
								resolve({ fileName, goodFileName })
							})
						} else {
							return resolve({ fileName, goodFileName })
						}

					})
					.catch(error => {
						console.log(error.message)
					})

				} else {
					resolve({ fileName, goodFileName })
				}
			})
			.catch(console.log)

		})
	})
}

Attachment.generateThumbnail = (fileName) => {
	return new Promise((resolve, reject) => {
		gm(filePath+fileName)
		.resize(550) //Resize to width of 550 px 
		.quality(75) //compression level 0 - 100 (best)
		.write(filePath + 'thumb_'+fileName, function (err) {
			resolve(fileName)
		})
	})
}

//Scans text for websites, returns all attachments
Attachment.scanTextForWebsites = (io, userId, noteId, noteText) => {
	return new Promise((resolve, reject) => {

		let solrAttachmentText = '' //Final searchable scrape text for note

		if(noteText.length == 0){ resolve(solrAttachmentText) }

		Attachment.urlForNote(userId, noteId).then(attachments => {

			//Pull all the URLs out of the text
			let foundUrls = SiteScrape.getCleanUrls(noteText)

			//Go through each saved URL, remove new URLs from saved URLs
			//If a URL is not found, delete it
			attachments.forEach(attachment => {
				//URL already scraped, push text and continue
				let urlIndex = foundUrls.indexOf( attachment.url )

				if(urlIndex != -1){
					solrAttachmentText += attachment.text
					foundUrls.splice(urlIndex, 1) //Remove existing from set of found
				} else {
					//If existing attachment is not found in note, remove it
					Attachment.delete(userId, attachment.id, true)
				}
			})

			//No newly scraped URLs, resolve with looked up attachment text
			if(foundUrls == null || foundUrls.length == 0){
				return resolve(solrAttachmentText)
			}

			//Process the remaining URLs into attachments
			Attachment.scrapeUrlsCreateAttachments(userId, noteId, foundUrls).then( freshlyScrapedText => {

				//Once everything is done being scraped, emit new attachment events
				SocketIo.to(userId).emit('update_counts')

				// Tell user to update attachments with scraped text
				SocketIo.to(userId).emit('update_note_attachments')

				solrAttachmentText += freshlyScrapedText
				resolve(solrAttachmentText)
			})
			.catch(console.log)
		})
	})
}

//Return scraped text from each URL
Attachment.scrapeUrlsCreateAttachments = (userId, noteId, foundUrls) => {
	return new Promise((resolve, reject) => {

		if(foundUrls == null || foundUrls.length == 0){ 
			return resolve('')
		}

		console.log('About to scrape')
		console.log(foundUrls)

		let processedCount = 0
		let scrapedText = ''

		//Process each URL passd to function, a DB entry will be created for each scrape
		foundUrls.forEach(url => {
			Attachment.processUrl(userId, noteId, url).then( freshlyScrapedText => {
				
				scrapedText += freshlyScrapedText
				processedCount ++

				//All URLs have been scraped, return data
				if(processedCount == foundUrls.length){
					console.log('All urls scraped')
					return resolve(scrapedText)
				}
			})
			.catch(error => {
				console.log('Site Scrape error', error)
			})
		})
	})
}

Attachment.downloadFileFromUrl = (url) => {
	

	return new Promise((resolve, reject) => {

			if(!url){
				return resolve(null)
			}

			const random = Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15)
			let extension = ''
			let fileName = random+'_scrape'
			let thumbPath = 'thumb_'+fileName

			console.log('Scraping image url', url)

			console.log('Getting ready to scrape ', url)

			request(url)
				.on('error', error => {
					console.log(error)
					resolve(null)
				})
				.on('response', res => {
					console.log(res.statusCode)
					console.log(res.headers['content-type'])
					//Get mime type from header content type
					// extension = '.'+String(res.headers['content-type']).split('/').pop()
				})
				.pipe(fs.createWriteStream(filePath+thumbPath))
				.on('close', () => {

					//resize image if its real big
					gm(filePath+thumbPath)
					.resize(550) //Resize to width of 550 px 
					.quality(85) //compression level 0 - 100 (best)
					.write(filePath+thumbPath, function (err) {
						if(err){ 
							console.log(err) 
							return resolve(null)
						}

						console.log('Saved Image')
						return resolve(fileName)
					})

				})
	})
}

Attachment.processUrl = (userId, noteId, url) => {

	const scrapeTime = 5*1000; 

	return new Promise((resolve, reject) => {


		const options = {
			uri: url,
			simple: true,
			timeout: scrapeTime,
			headers: {
				'User-Agent':'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' //Simulate google headers
			},
			transform: function (body) {
				return cheerio.load(body);
			}
		}

		let requestTimeout = null
		let thumbnail = null
		let request = null
		let created = Math.round((+new Date)/1000)
		let insertedId = null

		//Create a shell attachment for each URL, put in processing state
		db.promise()
		.query(`INSERT INTO attachment 
			(note_id, user_id, attachment_type, text, url, last_indexed, file_location) 
			VALUES (?, ?, ?, ?, ?, ?, ?)`, 
			[noteId, userId, 1, url, url, created, null])
		.then((rows, fields) => {
			//Set two bigger variables then return request for processing
			request = rp(options)
			insertedId = rows[0].insertId

			return request
		})
		.then($ => {

			//Clear timeout that would end this function
			clearTimeout(requestTimeout)

			// let header = $('h1').text().replace(removeWhitespace, " ")
			// desiredSearchText += header + "\n"

			const pageTitle = SiteScrape.getTitle($)

			const hostname = SiteScrape.getHostName(url)

			const thumbnail = SiteScrape.getDisplayImage($, url)

			const keywords = SiteScrape.getKeywords($)

			var desiredSearchText = ''
			desiredSearchText += pageTitle
			if(keywords){
				desiredSearchText += "\n " + keywords
			}

			console.log('Results from site scrape-------------')
			console.log({
				pageTitle,
				hostname,
				thumbnail,
				keywords
			})
			

			// throw new Error('Ending this function early.')

			
			// console.log('TexT Scraped')
			// console.log(desiredSearchText)

			created = Math.round((+new Date)/1000)

			//Scrape URL for thumbnail - take filename and save in attachment
			Attachment.downloadFileFromUrl(thumbnail)
			.then(thumbnailFilename => {

				//Update text and thumbnail filename
				created = Math.round((+new Date)/1000)
				db.promise()
				.query(`UPDATE attachment SET 
					text = ?,
					last_indexed = ?,
					file_location = ?
					WHERE id = ?
				`, [desiredSearchText, created, thumbnailFilename, insertedId])
				.then((rows, fields) => {
					resolve(desiredSearchText) //Return found text
				})
				.catch(console.log)


				//Create attachment in DB with scrape text and provided data
				// db.promise()
				// .query(`INSERT INTO attachment 
				// 	(note_id, user_id, attachment_type, text, url, last_indexed, file_location) 
				// 	VALUES (?, ?, ?, ?, ?, ?, ?)`, [noteId, userId, 1, desiredSearchText, url, created, thumbnailFilename])
				// .then((rows, fields) => {

				// 	resolve(desiredSearchText) //Return found text
				// })
				// .catch(console.log)
			})

		})
		.catch(error => {
			console.log('Scrape pooped out')
			console.log('Issue with scrape', error.statusCode)
			clearTimeout(requestTimeout)
			return resolve('No site text')
		})

		requestTimeout = setTimeout( () => {
			console.log('Cancel the request, its taking to long.')
			request.cancel()
			return resolve('Request Timeout')
		}, scrapeTime )
	})
}

Attachment.generatePushKey = (userId) => {
	return new Promise((resolve, reject) => {

		db.promise()
		.query("SELECT pushkey FROM user WHERE id = ? LIMIT 1", [userId])
		.then((rows, fields) => {

			const pushKey = rows[0][0].pushkey
			
			// push key exists
			if(pushKey && pushKey.length > 0){

				return resolve(pushKey)

			} else {

				// generate and save a new key
				const newPushKey = cs.createSmallSalt()
				db.promise()
				.query('UPDATE user SET pushkey = ? WHERE id = ? LIMIT 1', [newPushKey,userId])
				.then((rows, fields) => {

					return resolve(newPushKey)
				})
			}
			
		})
	})
}

Attachment.deletePushKey = (userId) => {
	return new Promise((resolve, reject) => {

		db.promise()
		.query('UPDATE user SET pushkey = null WHERE id = ? LIMIT 1', [userId])
		.then((rows, fields) => {

			return resolve(rows[0].affectedRows == 1)
		})
	})
}

Attachment.getPushkeyBookmarklet = (userId) => {
	return new Promise((resolve, reject) => {

		Attachment.generatePushKey(userId)
		.then( pushKey => {

			let bookmarklet = Attachment.generateBookmarkletText(pushKey)
			return resolve(bookmarklet)

		})
	})
}

Attachment.pushUrl = (pushkey,url) => {
	return new Promise((resolve, reject) => {

		let userId = null
		pushkey = pushkey.replace(/ /g, '+')

		db.promise()
		.query("SELECT id FROM user WHERE pushkey = ? LIMIT 1", [pushkey])
		.then((rows, fields) => {

			if(rows[0].length == 0){
				return resolve(true)
			}

			userId = rows[0][0].id
			return Attachment.scrapeUrlsCreateAttachments(userId, null, [url])			
		})
		.then(() => {

			if(typeof SocketIo != 'undefined'){
				//Once everything is done being scraped, emit new attachment events
				SocketIo.to(userId).emit('update_counts')

				// Tell user to update attachments with scraped text
				SocketIo.to(userId).emit('update_note_attachments')
			}

			return resolve(true)
		})
		.catch(console.log)
	})
}

Attachment.generateBookmarkletText = (pushKey) => {

	const endpoint = '/api/public/pushmebaby'
	let url = 'https://www.solidscribe.com' + endpoint
	if(process.env.NODE_ENV === 'development'){
		// url = 'https://192.168.1.164' + endpoint
	}

	// Terminate each line with a semi-colon, super important, since spaces are removed.
	
	// document.getElementById(id).remove();
	url += '?pushkey='+encodeURIComponent(pushKey)
	const bookmarkletV3 = `
		javascript: (() => {
			var p = encodeURIComponent(window.location.href);
			var n = "`+url+`&url="+p;
			window.open(n, '_blank', 'noopener=noopener');
			window.focus();

			var k = document.createElement("div");
			k.setAttribute("style", "position:fixed;right:10px;top:10px;z-index:222222;border-radius:4px;font-size:1.3em;padding:20px 15px;background: #8f51be;color:white;");
			k.innerHTML = "Posted URL to your Solid Scribe account";

			document.body.appendChild(k);

			setTimeout(()=>{
				k.remove();
			},5000);

		})();
	`

	return bookmarkletV3
		.replace(/\t|\r|\n/gm, "") // Remove tabs, new lines, returns
		.replace(/\s+/g, ' ') // remove double spaces
		.trim()
}