From 8e303a971147d764ea25a7b036dea2b3f9365c14 Mon Sep 17 00:00:00 2001 From: jzitnik-dev Date: Thu, 7 Aug 2025 00:38:11 +0200 Subject: [PATCH] chore: Save screenshot --- scrape/scraper.js | 221 +++++++++++++++++++++++----------------------- 1 file changed, 112 insertions(+), 109 deletions(-) diff --git a/scrape/scraper.js b/scrape/scraper.js index f5b2fd2..5b9b471 100644 --- a/scrape/scraper.js +++ b/scrape/scraper.js @@ -5,7 +5,7 @@ const parseThisShit = require('./parse'); const EMAIL = process.env.EMAIL; const PASSWORD = process.env.PASSWORD; -//const SHAREPOINT_URL = 'https://onedrive.live.com/personal/7d8c4d9baeeebde3/_layouts/15/doc2.aspx?resid=2bddf9b7-8613-4ae3-a684-0be6d73d90bf&cid=7d8c4d9baeeebde3&ct=1748937302474&wdOrigin=OFFICECOM-WEB.START.UPLOAD&wdPreviousSessionSrc=HarmonyWeb&wdPreviousSession=ce7df0ab-aade-4df2-9e2e-492e99049666'; + const SHAREPOINT_URL = 'https://spsejecnacz.sharepoint.com/:x:/s/nastenka/EbA_RcWKRdRNlB8YU1iuWM4BnMetCQlVm8toHuuyW-TPyA?e=uu3iPR&CID=2686cea0-2d06-3304-4519-087fb9e06fd0'; const VOLUME_PATH = path.resolve('./volume/browser'); @@ -14,17 +14,43 @@ async function clearDownloadsFolder() { await fs.promises.rm('./downloads', { recursive: true, force: true }); await fs.promises.mkdir('./downloads'); } catch (err) { - console.error('Error:', err); + console.error('Error clearing downloads folder:', err); } } +function waitForFile(filename, timeout = 30000) { + return new Promise((resolve, reject) => { + const start = Date.now(); + const interval = setInterval(() => { + if (fs.existsSync(filename)) { + clearInterval(interval); + resolve(); + } else if (Date.now() - start > timeout) { + clearInterval(interval); + reject(new Error('Timeout waiting for file')); + } + }, 500); + }); +} + +function getNewestFile(dir, ext = '.xlsx') { + const files = fs.readdirSync(dir) + .filter(f => f.endsWith(ext)) + .map(f => ({ + name: f, + time: fs.statSync(path.join(dir, f)).mtime.getTime() + })) + .sort((a, b) => b.time - a.time); + return files.length ? path.join(dir, files[0].name) : null; +} + (async () => { const browser = await puppeteer.launch({ headless: 'new', - //headless: false, userDataDir: VOLUME_PATH, args: ['--no-sandbox', '--disable-setuid-sandbox'] }); + const [page] = await browser.pages(); const downloadPath = path.resolve('./downloads'); @@ -36,114 +62,91 @@ async function clearDownloadsFolder() { downloadPath: downloadPath, }); - await page.goto(SHAREPOINT_URL, { waitUntil: 'networkidle2' }); - - await new Promise(r => setTimeout(r, 3000)); - - if (page.url().includes('login.')) { - console.log('Logging in...'); - - await page.waitForSelector('input[type="email"]', { timeout: 10000 }); - await page.type('input[type="email"]', EMAIL, { delay: 50 }); - await page.keyboard.press('Enter'); - await new Promise(r => setTimeout(r, 2000)); - - try { - await page.waitForSelector('div[role="button"]', { timeout: 5000 }); - const signInButtons = await page.$$('div[role="button"]'); - for (const btn of signInButtons) { - const text = await page.evaluate(el => el.innerText, btn); - if (text && text.includes('with password')) { - console.log('Clicking "Sign in with password"...'); - await btn.click(); - break; - } - } - } catch (err) { - console.log('"Sign in with password" button not found, continuing...'); - } - - await page.waitForSelector('input[type="password"]', { timeout: 100000 }); - await page.type('input[type="password"]', PASSWORD, { delay: 50 }); - await page.keyboard.press('Enter'); - - try { - await page.waitForSelector('input[type="submit"]', { timeout: 10000 }); - await page.click('input[type="submit"]'); - } catch { - console.log('No stay signed in prompt.'); - } - - // wait for navigation after login - await page.waitForNavigation({ waitUntil: 'networkidle2' }); - } - - // Wait for iframe containing file options - const frameHandle = await page.waitForSelector('iframe'); - const frame = await frameHandle.contentFrame(); - - await frame.waitForSelector('button[title="File"]', { timeout: 60000 }); - await frame.click('button[title="File"]'); - - await new Promise(r => setTimeout(r, 500)); - - // Click "Create a Copy" try { - await frame.waitForSelector('div[role="menuitem"][name="Create a Copy"]', { visible: true }); - } catch { + await page.goto(SHAREPOINT_URL, { waitUntil: 'networkidle2', timeout: 60000 }); + + await new Promise(r => setTimeout(r, 3000)); + + if (page.url().includes('login.')) { + console.log('Logging in...'); + + await page.waitForSelector('input[type="email"]', { timeout: 10000 }); + await page.type('input[type="email"]', EMAIL, { delay: 50 }); + await page.keyboard.press('Enter'); + await new Promise(r => setTimeout(r, 2000)); + + try { + await page.waitForSelector('div[role="button"]', { timeout: 5000 }); + const signInButtons = await page.$$('div[role="button"]'); + for (const btn of signInButtons) { + const text = await page.evaluate(el => el.innerText, btn); + if (text && text.includes('with password')) { + console.log('Clicking "Sign in with password"...'); + await btn.click(); + break; + } + } + } catch (err) { + console.log('"Sign in with password" button not found, continuing...'); + } + + await page.waitForSelector('input[type="password"]', { timeout: 100000 }); + await page.type('input[type="password"]', PASSWORD, { delay: 50 }); + await page.keyboard.press('Enter'); + + try { + await page.waitForSelector('input[type="submit"]', { timeout: 10000 }); + await page.click('input[type="submit"]'); + } catch { + console.log('No stay signed in prompt.'); + } + + await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 60000 }); + } + + const frameHandle = await page.waitForSelector('iframe', { timeout: 60000 }); + const frame = await frameHandle.contentFrame(); + + await frame.waitForSelector('button[title="File"]', { timeout: 60000 }); await frame.click('button[title="File"]'); await new Promise(r => setTimeout(r, 500)); + + try { + await frame.waitForSelector('div[role="menuitem"][name="Create a Copy"]', { visible: true }); + } catch { + await frame.click('button[title="File"]'); + await new Promise(r => setTimeout(r, 500)); + } + + await frame.click('div[role="menuitem"][name="Create a Copy"]'); + await new Promise(r => setTimeout(r, 500)); + + await frame.waitForSelector('div[role="menuitem"][name="Download a Copy"]', { visible: true }); + await frame.click('div[role="menuitem"][name="Download a Copy"]'); + + await new Promise(r => setTimeout(r, 10000)); + + const downloadedFilePath = getNewestFile(downloadPath, '.xlsx'); + if (!downloadedFilePath) { + throw new Error('No XLSX file found in download folder'); + } + + console.log('Waiting for file:', downloadedFilePath); + await waitForFile(downloadedFilePath); + + parseThisShit(downloadedFilePath); + + await clearDownloadsFolder(); + } catch (err) { + console.error('❌ Error occurred:', err.message); + const screenshotPath = `error-${Date.now()}.png`; + try { + await page.screenshot({ path: screenshotPath, fullPage: true }); + console.log(`📷 Screenshot saved to: ${screenshotPath}`); + } catch (screenshotErr) { + console.error('⚠️ Failed to capture screenshot:', screenshotErr.message); + } + } finally { + await browser.close(); } - await frame.click('div[role="menuitem"][name="Create a Copy"]'); - - await new Promise(r => setTimeout(r, 500)); - - // Click "Download a Copy" - await frame.waitForSelector('div[role="menuitem"][name="Download a Copy"]', { visible: true }); - await frame.click('div[role="menuitem"][name="Download a Copy"]'); - - // Wait some seconds for download to start - await new Promise(r => setTimeout(r, 10000)); - - // Helper: wait for file to appear in download folder - function waitForFile(filename, timeout = 30000) { - return new Promise((resolve, reject) => { - const start = Date.now(); - const interval = setInterval(() => { - if (fs.existsSync(filename)) { - clearInterval(interval); - resolve(); - } else if (Date.now() - start > timeout) { - clearInterval(interval); - reject(new Error('Timeout waiting for file')); - } - }, 500); - }); - } - - // Helper: get newest .xlsx file in downloads folder - function getNewestFile(dir, ext = '.xlsx') { - const files = fs.readdirSync(dir) - .filter(f => f.endsWith(ext)) - .map(f => ({ - name: f, - time: fs.statSync(path.join(dir, f)).mtime.getTime() - })) - .sort((a, b) => b.time - a.time); - return files.length ? path.join(dir, files[0].name) : null; - } - - // Wait for the downloaded file - const downloadedFilePath = getNewestFile(downloadPath, '.xlsx'); - if (!downloadedFilePath) { - throw new Error('No XLSX file found in download folder'); - } - console.log('Waiting for file:', downloadedFilePath); - await waitForFile(downloadedFilePath); - - parseThisShit(downloadedFilePath); - - await clearDownloadsFolder(); - - await browser.close(); })();