// ==UserScript== // @name Outlook Web Message Harvester // @namespace http://tampermonkey.net/ // @version 0.1 // @description Iterate every message in the Inbox, capture the reading-pane text, and batch-export every 100 entries as JSON files. // @author Codex // @match file:///home/matmanna/projects/exchange-exporter/page.html* // @match https://outlook.office.com/* // @grant none // ==/UserScript== (function () { 'use strict'; const DEFAULT_BATCH_SIZE = 100; const SCROLL_PAUSE = 1200; const WAIT_POLL = 300; const MAX_SCROLL_FAILURES = 4; const state = { running: false, processed: new Set(), collected: [], batchIndex: 0, attachmentDownloads: new Set(), }; function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } function sanitizeFileName(name) { if (!name) { return 'attachment'; } return name .toString() .replace(/[<>:\\"/|?*\u0000-\u001F]+/g, '_') .replace(/\s+/g, ' ') .trim() .slice(0, 190); } function triggerDownload(blob, fileName) { const url = URL.createObjectURL(blob); const anchor = document.createElement('a'); anchor.href = url; anchor.download = fileName; document.body.appendChild(anchor); anchor.click(); document.body.removeChild(anchor); URL.revokeObjectURL(url); } async function waitFor(predicate, timeout = 20000) { const start = Date.now(); while (Date.now() - start < timeout) { const result = await predicate(); if (result) { return result; } await sleep(WAIT_POLL); } return null; } function downloadJson(payload, batchLabel) { const blob = new Blob([payload], { type: 'application/json' }); triggerDownload(blob, `outlook-messages-${batchLabel}.json`); } function getRowId(meta) { if (meta?.id) { return meta.id; } return meta?.wrapper?.dataset?.convid || meta?.row?.dataset?.convid || meta?.wrapper?.id || meta?.row?.id; } function hydrateMessage(meta, readingPaneSubject, readingPaneBody) { const row = meta.row; const subjectFromList = row?.querySelector('[class*="lvHighlightSubjectClass"]')?.innerText?.trim(); const sender = row?.querySelector('[class*="lvHighlightFromClass"]')?.innerText?.trim(); const time = row?.querySelector('._lvv_M')?.innerText?.trim(); const snippet = row?.querySelector('._lvv_O span')?.innerText?.trim() || row?.querySelector('._lvv_N span')?.innerText?.trim(); return { id: getRowId(meta), subject: readingPaneSubject || subjectFromList || null, sender: sender || null, listSnippet: snippet || null, listTime: time || null, paneBodyText: readingPaneBody.text || null, paneBodyHtml: readingPaneBody.html || null, paneSubject: readingPaneSubject || null, capturedAt: new Date().toISOString(), }; } async function captureReadingPane(expectedSubject) { const bodyEl = document.getElementById('Item.MessageUniqueBody'); if (!bodyEl) { return { subject: null, text: null, html: null }; } await waitFor(() => { const subjectEl = document.querySelector('[autoid="_rp_4"], .rpHighlightSubjectClass'); const currentSubject = subjectEl?.innerText?.trim(); if (!currentSubject) { return false; } if (!expectedSubject) { return currentSubject; } if (currentSubject === expectedSubject || expectedSubject.includes(currentSubject) || currentSubject.includes(expectedSubject)) { return currentSubject; } return false; }, 10000); const subjectEl = document.querySelector('[autoid="_rp_4"], .rpHighlightSubjectClass'); const subjectText = subjectEl?.innerText?.trim() || null; const text = bodyEl?.innerText?.trim() || null; const html = bodyEl?.innerHTML || null; return { subject: subjectText, text, html }; } async function clickRow(row) { row.scrollIntoView({ block: 'center', inline: 'nearest' }); row.click(); await sleep(400); } function getMessageRows() { const wrappers = Array.from(document.querySelectorAll('[data-convid]')); const rows = []; for (const wrapper of wrappers) { const rowElement = wrapper.querySelector('[role="option"]') || wrapper; if (!rowElement) { continue; } if (rowElement.getAttribute('aria-hidden') === 'true') { continue; } if (rowElement.offsetWidth === 0 || rowElement.offsetHeight === 0) { continue; } rows.push({ wrapper, row: rowElement, id: getRowId({ wrapper, row: rowElement }) }); } return rows; } function getScrollContainer(sampleRow) { return sampleRow?.closest('[class*="scrollContainer"]') || document.documentElement; } async function scrollToBottom(container) { const target = container === document.documentElement ? document.documentElement : container; target.scrollTop = target.scrollHeight; await sleep(SCROLL_PAUSE); } async function processRow(meta, batchSize) { const rowId = getRowId(meta); if (!rowId || state.processed.has(rowId)) { return; } state.processed.add(rowId); const subjectFromList = meta.row?.querySelector('[class*="lvHighlightSubjectClass"]')?.innerText?.trim(); await clickRow(meta.row); const pane = await captureReadingPane(subjectFromList); const message = hydrateMessage(meta, pane.subject, pane); state.collected.push(message); if (state.collected.length >= batchSize) { state.batchIndex += 1; downloadJson(JSON.stringify(state.collected, null, 2), `${state.batchIndex}-${new Date().toISOString().replace(/[:.]/g, '-')}`); state.collected = []; } } async function runExtraction(batchSize = DEFAULT_BATCH_SIZE) { if (state.running) { console.log('[OutlookExtractor] already running.'); return; } state.running = true; try { const firstRowWrapper = await waitFor(() => document.querySelector('[data-convid]'), 30000); if (!firstRowWrapper) { console.warn('[OutlookExtractor] message pane never appeared.'); return; } const baseContainer = getScrollContainer(firstRowWrapper.querySelector('[role="option"]') || firstRowWrapper); let scrollFailures = 0; while (true) { const rows = getMessageRows(); const unprocessed = rows.filter((row) => !state.processed.has(getRowId(row))); if (unprocessed.length === 0) { scrollFailures += 1; } else { scrollFailures = 0; } for (const row of unprocessed) { await processRow(row, batchSize); } if (scrollFailures >= MAX_SCROLL_FAILURES) { break; } await scrollToBottom(baseContainer); } } catch (error) { console.error('[OutlookExtractor] failed', error); } finally { if (state.collected.length) { state.batchIndex += 1; downloadJson(JSON.stringify(state.collected, null, 2), `${state.batchIndex}-${new Date().toISOString().replace(/[:.]/g, '-')}`); state.collected = []; } state.running = false; console.log('[OutlookExtractor] finished.'); } } window.outlookEmailExtractor = { start: (batchSize) => runExtraction(batchSize), isRunning: () => state.running, }; // start automatically once the page settles setTimeout(() => runExtraction(), 5000); })();