Added a build script that will push newly build code to avid habit
Added request timeout to prevent long requests from holding up note saving Added header to request to try and simulate google crawler
This commit is contained in:
@@ -112,15 +112,26 @@ Attachment.processUrl = (userId, noteId, url) => {
|
||||
|
||||
var removeWhitespace = /\s+/g
|
||||
|
||||
|
||||
// console.log('Scraping ', website)
|
||||
const options = {
|
||||
uri: url,
|
||||
simple: true,
|
||||
timeout: 1000 * 10, // 10 seconds
|
||||
headers: {
|
||||
'User-Agent':'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' //Simulate google headers
|
||||
},
|
||||
transform: function (body) {
|
||||
return cheerio.load(body);
|
||||
}
|
||||
}
|
||||
|
||||
rp(options).then($ => {
|
||||
let requestTimeout = null
|
||||
|
||||
let request = rp(options)
|
||||
.then($ => {
|
||||
|
||||
clearTimeout(requestTimeout)
|
||||
|
||||
var desiredSearchText = ''
|
||||
|
||||
@@ -191,5 +202,29 @@ Attachment.processUrl = (userId, noteId, url) => {
|
||||
.catch(console.log)
|
||||
|
||||
})
|
||||
.catch(error => {
|
||||
console.log('Issue with scrape')
|
||||
console.log(error)
|
||||
resolve('')
|
||||
})
|
||||
|
||||
requestTimeout = setTimeout( () => {
|
||||
console.log('Cancel the request, its taking to long.')
|
||||
request.cancel()
|
||||
|
||||
desiredSearchText = 'Unable to Scrape URL at this time'
|
||||
const created = Math.round((+new Date)/1000)
|
||||
|
||||
//Create attachment in DB with scrape text and provided data
|
||||
db.promise()
|
||||
.query(`INSERT INTO attachment
|
||||
(note_id, user_id, attachment_type, text, url, last_indexed)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`, [noteId, userId, 1, desiredSearchText, url, created])
|
||||
.then((rows, fields) => {
|
||||
resolve(desiredSearchText) //Return found text
|
||||
})
|
||||
.catch(console.log)
|
||||
|
||||
}, (5000))
|
||||
})
|
||||
}
|
Reference in New Issue
Block a user