From f55a996360bab563fb6cba2d5cdca5e5fe77f82c Mon Sep 17 00:00:00 2001 From: jzitnik-dev Date: Tue, 2 Sep 2025 07:58:32 +0200 Subject: [PATCH] fix: Errors --- .gitignore | 1 + scrape/scraper.js | 272 +++++++++++++++++++++++++--------------------- 2 files changed, 152 insertions(+), 121 deletions(-) diff --git a/.gitignore b/.gitignore index a3f389c..462657e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ node_modules volume/browser db downloads +errors diff --git a/scrape/scraper.js b/scrape/scraper.js index 2259568..60a835c 100644 --- a/scrape/scraper.js +++ b/scrape/scraper.js @@ -5,7 +5,6 @@ import parseThisShit from './parse.js'; const EMAIL = process.env.EMAIL; const PASSWORD = process.env.PASSWORD; -//const SHAREPOINT_URL = 'https://onedrive.live.com/personal/7d8c4d9baeeebde3/_layouts/15/doc2.aspx?resid=2bddf9b7-8613-4ae3-a684-0be6d73d90bf&cid=7d8c4d9baeeebde3&ct=1748937302474&wdOrigin=OFFICECOM-WEB.START.UPLOAD&wdPreviousSessionSrc=HarmonyWeb&wdPreviousSession=ce7df0ab-aade-4df2-9e2e-492e99049666'; const SHAREPOINT_URL = 'https://spsejecnacz.sharepoint.com/:x:/s/nastenka/ESy19K245Y9BouR5ksciMvgBu3Pn_9EaT0fpP8R6MrkEmg'; const VOLUME_PATH = path.resolve('./volume/browser'); @@ -18,135 +17,166 @@ async function clearDownloadsFolder() { } } -(async () => { - const browser = await puppeteer.launch({ - headless: 'new', - userDataDir: VOLUME_PATH, - args: ['--no-sandbox', '--disable-setuid-sandbox'] - }); - const [page] = await browser.pages(); - - const downloadPath = path.resolve('./downloads'); - if (!fs.existsSync(downloadPath)) fs.mkdirSync(downloadPath); - - const client = await page.target().createCDPSession(); - await client.send('Page.setDownloadBehavior', { - behavior: 'allow', - downloadPath: downloadPath, - }); - - await page.goto(SHAREPOINT_URL, { waitUntil: 'networkidle2' }); - - await new Promise(r => setTimeout(r, 3000)); - - if (page.url().includes('login.')) { - console.log('Logging in...'); - - try { - await page.waitForSelector('input[type="email"]', { timeout: 3000 }); - await page.type('input[type="email"]', EMAIL, { delay: 50 }); - await page.keyboard.press('Enter'); - } catch { - await page.waitForSelector(".table"); - await page.click('.table'); - } - await new Promise(r => setTimeout(r, 2000)); - - try { - await page.waitForSelector('div[role="button"]', { timeout: 5000 }); - const signInButtons = await page.$$('div[role="button"]'); - for (const btn of signInButtons) { - const text = await page.evaluate(el => el.innerText, btn); - if (text && text.includes('with password')) { - console.log('Clicking "Sign in with password"...'); - await btn.click(); - break; - } - } - } catch (err) { - console.log('"Sign in with password" button not found, continuing...'); - } - - await page.waitForSelector('input[type="password"]', { timeout: 100000 }); - await page.type('input[type="password"]', PASSWORD, { delay: 50 }); - await page.keyboard.press('Enter'); - - try { - await page.waitForSelector('input[type="submit"]', { timeout: 10000 }); - await page.click('input[type="submit"]'); - } catch { - console.log('No stay signed in prompt.'); - } - - // wait for navigation after login - await page.waitForNavigation({ waitUntil: 'networkidle2' }); - } - - // Wait for iframe containing file options - const frameHandle = await page.waitForSelector('iframe'); - const frame = await frameHandle.contentFrame(); - - await frame.waitForSelector('button[title="File"]', { timeout: 60000 }); - await frame.click('button[title="File"]'); - - await new Promise(r => setTimeout(r, 500)); - - // Click "Create a Copy" +async function handleError(page, err) { try { - await frame.waitForSelector('div[role="menuitem"][name="Create a Copy"]', { visible: true }); - } catch { - await frame.click('button[title="File"]'); - await new Promise(r => setTimeout(r, 500)); - } - await frame.click('div[role="menuitem"][name="Create a Copy"]'); + const errorsDir = path.resolve('./errors'); + if (!fs.existsSync(errorsDir)) fs.mkdirSync(errorsDir); - await new Promise(r => setTimeout(r, 500)); + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const filePath = path.join(errorsDir, `error-${timestamp}.png`); - // Click "Download a Copy" - await frame.waitForSelector('div[role="menuitem"][name="Download a Copy"]', { visible: true }); - await frame.click('div[role="menuitem"][name="Download a Copy"]'); + await page.screenshot({ path: filePath, fullPage: true }); + console.error(`❌ Error occurred. Screenshot saved: ${filePath}`); - // Wait some seconds for download to start - await new Promise(r => setTimeout(r, 10000)); - - // Helper: wait for file to appear in download folder - function waitForFile(filename, timeout = 30000) { - return new Promise((resolve, reject) => { - const start = Date.now(); - const interval = setInterval(() => { - if (fs.existsSync(filename)) { - clearInterval(interval); - resolve(); - } else if (Date.now() - start > timeout) { - clearInterval(interval); - reject(new Error('Timeout waiting for file')); - } - }, 500); - }); - } - - // Helper: get newest .xlsx file in downloads folder - function getNewestFile(dir) { - const files = fs.readdirSync(dir) + // Keep only last 10 screenshots + const files = fs.readdirSync(errorsDir) .map(f => ({ name: f, - time: fs.statSync(path.join(dir, f)).mtime.getTime() + time: fs.statSync(path.join(errorsDir, f)).mtime.getTime() })) .sort((a, b) => b.time - a.time); - return files.length ? path.join(dir, files[0].name) : null; + + if (files.length > 10) { + const oldFiles = files.slice(10); + for (const f of oldFiles) { + fs.unlinkSync(path.join(errorsDir, f.name)); + } + } + } catch (screenshotErr) { + console.error('Failed to take screenshot:', screenshotErr); } + console.error(err); +} - // Wait for the downloaded file - const downloadedFilePath = getNewestFile(downloadPath); - if (!downloadedFilePath) { - throw new Error('No XLSX file found in download folder'); +(async () => { + let browser, page; + try { + browser = await puppeteer.launch({ + headless: 'new', + userDataDir: VOLUME_PATH, + args: ['--no-sandbox', '--disable-setuid-sandbox'] + }); + [page] = await browser.pages(); + + const downloadPath = path.resolve('./downloads'); + if (!fs.existsSync(downloadPath)) fs.mkdirSync(downloadPath); + + const client = await page.target().createCDPSession(); + await client.send('Page.setDownloadBehavior', { + behavior: 'allow', + downloadPath: downloadPath, + }); + + await page.goto(SHAREPOINT_URL, { waitUntil: 'networkidle2' }); + + await new Promise(r => setTimeout(r, 3000)); + + if (page.url().includes('login.')) { + console.log('Logging in...'); + + try { + await page.waitForSelector('input[type="email"]', { timeout: 3000 }); + await page.type('input[type="email"]', EMAIL, { delay: 50 }); + await page.keyboard.press('Enter'); + } catch { + await page.waitForSelector(".table"); + await page.click('.table'); + } + await new Promise(r => setTimeout(r, 2000)); + + try { + await page.waitForSelector('div[role="button"]', { timeout: 5000 }); + const signInButtons = await page.$$('div[role="button"]'); + for (const btn of signInButtons) { + const text = await page.evaluate(el => el.innerText, btn); + if (text && text.includes('with password')) { + console.log('Clicking "Sign in with password"...'); + await btn.click(); + break; + } + } + } catch { + console.log('"Sign in with password" button not found, continuing...'); + } + + await page.waitForSelector('input[type="password"]', { timeout: 100000 }); + await page.type('input[type="password"]', PASSWORD, { delay: 50 }); + await page.keyboard.press('Enter'); + + try { + await page.waitForSelector('input[type="submit"]', { timeout: 10000 }); + await page.click('input[type="submit"]'); + } catch { + console.log('No stay signed in prompt.'); + } + + await page.waitForNavigation({ waitUntil: 'networkidle2' }); + } + + const frameHandle = await page.waitForSelector('iframe'); + const frame = await frameHandle.contentFrame(); + + await frame.waitForSelector('button[title="File"]', { timeout: 60000 }); + await frame.click('button[title="File"]'); + await new Promise(r => setTimeout(r, 500)); + + try { + await frame.waitForSelector('div[role="menuitem"][name="Create a Copy"]', { visible: true }); + } catch { + await frame.click('button[title="File"]'); + await new Promise(r => setTimeout(r, 500)); + } + await frame.click('div[role="menuitem"][name="Create a Copy"]'); + await new Promise(r => setTimeout(r, 500)); + + await frame.waitForSelector('div[role="menuitem"][name="Download a Copy"]', { visible: true }); + await frame.click('div[role="menuitem"][name="Download a Copy"]'); + + await new Promise(r => setTimeout(r, 10000)); + + function waitForFile(filename, timeout = 30000) { + return new Promise((resolve, reject) => { + const start = Date.now(); + const interval = setInterval(() => { + if (fs.existsSync(filename)) { + clearInterval(interval); + resolve(); + } else if (Date.now() - start > timeout) { + clearInterval(interval); + reject(new Error('Timeout waiting for file')); + } + }, 500); + }); + } + + function getNewestFile(dir) { + const files = fs.readdirSync(dir) + .map(f => ({ + name: f, + time: fs.statSync(path.join(dir, f)).mtime.getTime() + })) + .sort((a, b) => b.time - a.time); + return files.length ? path.join(dir, files[0].name) : null; + } + + const downloadedFilePath = getNewestFile(downloadPath); + if (!downloadedFilePath) { + throw new Error('No XLSX file found in download folder'); + } + console.log('Waiting for file:', downloadedFilePath); + await waitForFile(downloadedFilePath); + + await parseThisShit(downloadedFilePath); + await clearDownloadsFolder(); + + await browser.close(); + } catch (err) { + if (page) { + await handleError(page, err); + } else { + console.error('Fatal error before page init:', err); + } + if (browser) await browser.close(); + process.exit(1); } - console.log('Waiting for file:', downloadedFilePath); - await waitForFile(downloadedFilePath); - - await parseThisShit(downloadedFilePath); - - await clearDownloadsFolder(); - - await browser.close(); })();