diff --git a/scrape/scraper.js b/scrape/scraper.js index 5b9b471..0577cb3 100644 --- a/scrape/scraper.js +++ b/scrape/scraper.js @@ -5,7 +5,7 @@ const parseThisShit = require('./parse'); const EMAIL = process.env.EMAIL; const PASSWORD = process.env.PASSWORD; - +//const SHAREPOINT_URL = 'https://onedrive.live.com/personal/7d8c4d9baeeebde3/_layouts/15/doc2.aspx?resid=2bddf9b7-8613-4ae3-a684-0be6d73d90bf&cid=7d8c4d9baeeebde3&ct=1748937302474&wdOrigin=OFFICECOM-WEB.START.UPLOAD&wdPreviousSessionSrc=HarmonyWeb&wdPreviousSession=ce7df0ab-aade-4df2-9e2e-492e99049666'; const SHAREPOINT_URL = 'https://spsejecnacz.sharepoint.com/:x:/s/nastenka/EbA_RcWKRdRNlB8YU1iuWM4BnMetCQlVm8toHuuyW-TPyA?e=uu3iPR&CID=2686cea0-2d06-3304-4519-087fb9e06fd0'; const VOLUME_PATH = path.resolve('./volume/browser'); @@ -14,43 +14,16 @@ async function clearDownloadsFolder() { await fs.promises.rm('./downloads', { recursive: true, force: true }); await fs.promises.mkdir('./downloads'); } catch (err) { - console.error('Error clearing downloads folder:', err); + console.error('Error:', err); } } -function waitForFile(filename, timeout = 30000) { - return new Promise((resolve, reject) => { - const start = Date.now(); - const interval = setInterval(() => { - if (fs.existsSync(filename)) { - clearInterval(interval); - resolve(); - } else if (Date.now() - start > timeout) { - clearInterval(interval); - reject(new Error('Timeout waiting for file')); - } - }, 500); - }); -} - -function getNewestFile(dir, ext = '.xlsx') { - const files = fs.readdirSync(dir) - .filter(f => f.endsWith(ext)) - .map(f => ({ - name: f, - time: fs.statSync(path.join(dir, f)).mtime.getTime() - })) - .sort((a, b) => b.time - a.time); - return files.length ? path.join(dir, files[0].name) : null; -} - (async () => { const browser = await puppeteer.launch({ headless: 'new', userDataDir: VOLUME_PATH, args: ['--no-sandbox', '--disable-setuid-sandbox'] }); - const [page] = await browser.pages(); const downloadPath = path.resolve('./downloads'); @@ -62,91 +35,114 @@ function getNewestFile(dir, ext = '.xlsx') { downloadPath: downloadPath, }); - try { - await page.goto(SHAREPOINT_URL, { waitUntil: 'networkidle2', timeout: 60000 }); + await page.goto(SHAREPOINT_URL, { waitUntil: 'networkidle2' }); - await new Promise(r => setTimeout(r, 3000)); + await new Promise(r => setTimeout(r, 3000)); - if (page.url().includes('login.')) { - console.log('Logging in...'); + if (page.url().includes('login.')) { + console.log('Logging in...'); - await page.waitForSelector('input[type="email"]', { timeout: 10000 }); - await page.type('input[type="email"]', EMAIL, { delay: 50 }); - await page.keyboard.press('Enter'); - await new Promise(r => setTimeout(r, 2000)); + await page.waitForSelector('input[type="email"]', { timeout: 10000 }); + await page.type('input[type="email"]', EMAIL, { delay: 50 }); + await page.keyboard.press('Enter'); + await new Promise(r => setTimeout(r, 2000)); - try { - await page.waitForSelector('div[role="button"]', { timeout: 5000 }); - const signInButtons = await page.$$('div[role="button"]'); - for (const btn of signInButtons) { - const text = await page.evaluate(el => el.innerText, btn); - if (text && text.includes('with password')) { - console.log('Clicking "Sign in with password"...'); - await btn.click(); - break; - } + try { + await page.waitForSelector('div[role="button"]', { timeout: 5000 }); + const signInButtons = await page.$$('div[role="button"]'); + for (const btn of signInButtons) { + const text = await page.evaluate(el => el.innerText, btn); + if (text && text.includes('with password')) { + console.log('Clicking "Sign in with password"...'); + await btn.click(); + break; } - } catch (err) { - console.log('"Sign in with password" button not found, continuing...'); } - - await page.waitForSelector('input[type="password"]', { timeout: 100000 }); - await page.type('input[type="password"]', PASSWORD, { delay: 50 }); - await page.keyboard.press('Enter'); - - try { - await page.waitForSelector('input[type="submit"]', { timeout: 10000 }); - await page.click('input[type="submit"]'); - } catch { - console.log('No stay signed in prompt.'); - } - - await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 60000 }); + } catch (err) { + console.log('"Sign in with password" button not found, continuing...'); } - const frameHandle = await page.waitForSelector('iframe', { timeout: 60000 }); - const frame = await frameHandle.contentFrame(); + await page.waitForSelector('input[type="password"]', { timeout: 100000 }); + await page.type('input[type="password"]', PASSWORD, { delay: 50 }); + await page.keyboard.press('Enter'); - await frame.waitForSelector('button[title="File"]', { timeout: 60000 }); + try { + await page.waitForSelector('input[type="submit"]', { timeout: 10000 }); + await page.click('input[type="submit"]'); + } catch { + console.log('No stay signed in prompt.'); + } + + // wait for navigation after login + await page.waitForNavigation({ waitUntil: 'networkidle2' }); + } + + // Wait for iframe containing file options + const frameHandle = await page.waitForSelector('iframe'); + const frame = await frameHandle.contentFrame(); + + await frame.waitForSelector('button[title="File"]', { timeout: 60000 }); + await frame.click('button[title="File"]'); + + await new Promise(r => setTimeout(r, 500)); + + // Click "Create a Copy" + try { + await frame.waitForSelector('div[role="menuitem"][name="Create a Copy"]', { visible: true }); + } catch { await frame.click('button[title="File"]'); await new Promise(r => setTimeout(r, 500)); - - try { - await frame.waitForSelector('div[role="menuitem"][name="Create a Copy"]', { visible: true }); - } catch { - await frame.click('button[title="File"]'); - await new Promise(r => setTimeout(r, 500)); - } - - await frame.click('div[role="menuitem"][name="Create a Copy"]'); - await new Promise(r => setTimeout(r, 500)); - - await frame.waitForSelector('div[role="menuitem"][name="Download a Copy"]', { visible: true }); - await frame.click('div[role="menuitem"][name="Download a Copy"]'); - - await new Promise(r => setTimeout(r, 10000)); - - const downloadedFilePath = getNewestFile(downloadPath, '.xlsx'); - if (!downloadedFilePath) { - throw new Error('No XLSX file found in download folder'); - } - - console.log('Waiting for file:', downloadedFilePath); - await waitForFile(downloadedFilePath); - - parseThisShit(downloadedFilePath); - - await clearDownloadsFolder(); - } catch (err) { - console.error('❌ Error occurred:', err.message); - const screenshotPath = `error-${Date.now()}.png`; - try { - await page.screenshot({ path: screenshotPath, fullPage: true }); - console.log(`📷 Screenshot saved to: ${screenshotPath}`); - } catch (screenshotErr) { - console.error('⚠️ Failed to capture screenshot:', screenshotErr.message); - } - } finally { - await browser.close(); } + await frame.click('div[role="menuitem"][name="Create a Copy"]'); + + await new Promise(r => setTimeout(r, 500)); + + // Click "Download a Copy" + await frame.waitForSelector('div[role="menuitem"][name="Download a Copy"]', { visible: true }); + await frame.click('div[role="menuitem"][name="Download a Copy"]'); + + // Wait some seconds for download to start + await new Promise(r => setTimeout(r, 10000)); + + // Helper: wait for file to appear in download folder + function waitForFile(filename, timeout = 30000) { + return new Promise((resolve, reject) => { + const start = Date.now(); + const interval = setInterval(() => { + if (fs.existsSync(filename)) { + clearInterval(interval); + resolve(); + } else if (Date.now() - start > timeout) { + clearInterval(interval); + reject(new Error('Timeout waiting for file')); + } + }, 500); + }); + } + + // Helper: get newest .xlsx file in downloads folder + function getNewestFile(dir, ext = '.xlsx') { + const files = fs.readdirSync(dir) + .filter(f => f.endsWith(ext)) + .map(f => ({ + name: f, + time: fs.statSync(path.join(dir, f)).mtime.getTime() + })) + .sort((a, b) => b.time - a.time); + return files.length ? path.join(dir, files[0].name) : null; + } + + // Wait for the downloaded file + const downloadedFilePath = getNewestFile(downloadPath, '.xlsx'); + if (!downloadedFilePath) { + throw new Error('No XLSX file found in download folder'); + } + console.log('Waiting for file:', downloadedFilePath); + await waitForFile(downloadedFilePath); + + parseThisShit(downloadedFilePath); + + await clearDownloadsFolder(); + + await browser.close(); })();