// ==UserScript==
// @name PDF Grabber (Direct + Image-Viewer Fallback)
// @namespace userscripts
// @version 1.1.0
// @description Floating button. Tries to find a direct PDF on the page; if none, falls back to harvesting a paged image viewer (Scribd-style). Two output modes: 'print' (browser Save-as-PDF; preserves text layer; default) or 'pdf' (image-only PDF via jsPDF).
// @author whoever
// @match *://*.scribd.com/*
// @grant GM_xmlhttpRequest
// @grant GM_download
// @grant unsafeWindow
// @connect *
// @require https://cdnjs.cloudflare.com/ajax/libs/jspdf/2.5.1/jspdf.umd.min.js
// @run-at document-idle
// ==/UserScript==
(function () {
'use strict';
// ============================================================
// CONFIG
// ============================================================
const CONFIG = {
// 'print' = build a clean iframe of all pages and call window.print()
// => preserves text layer (selectable text in resulting PDF)
// => requires user to pick "Save as PDF" in the print dialog
// 'pdf' = stitch image_layer images into a single PDF via jsPDF
// => fully automatic, no dialog
// => image-only, NO text in result
// 'images' = save each image as a separate file
OUTPUT_MODE: 'print',
// Selectors for the paged image viewer.
IMAGE_VIEWER_PAGE_SELECTOR: '.outer_page',
INNER_PAGE_SELECTOR : '.newpage',
IMAGE_VIEWER_IMG_SELECTOR : '.absimg, .image_layer img',
// CSS selectors of in-page promo / ad blocks to strip before printing.
PRINT_HIDE_SELECTORS: [
// Scribd's between-page promo div uses an obfuscated class name.
'[class*="vxxhmpejhe"]',
// Generic ad slots
'[id*="ad-"]', '[id*="ad_"]', '[class*="advert"]',
],
PAGE_LOAD_WAIT_MS: 800,
FINAL_SETTLE_MS: 1500,
BUTTON_OPACITY_IDLE: 0.25,
BUTTON_OPACITY_HOVER: 1.0,
BUTTON_POSITION: { top: '8px', left: '8px' },
FILENAME: '',
DEBUG: true,
};
// ============================================================
const log = (...a) => CONFIG.DEBUG && console.log('[PDF-Grabber]', ...a);
const warn = (...a) => console.warn('[PDF-Grabber]', ...a);
// ---------- 1. UI ----------
function buildButton() {
if (document.getElementById('__pdf_grabber_btn__')) return;
const btn = document.createElement('button');
btn.id = '__pdf_grabber_btn__';
btn.textContent = '⤓ PDF';
btn.title = 'PDF Grabber — click to download';
Object.assign(btn.style, {
position: 'fixed',
top: CONFIG.BUTTON_POSITION.top,
left: CONFIG.BUTTON_POSITION.left,
zIndex: 2147483647,
padding: '6px 10px',
fontSize: '12px',
fontFamily: 'sans-serif',
background: 'rgba(0,0,0,0.6)',
color: '#fff',
border: '1px solid rgba(255,255,255,0.4)',
borderRadius: '4px',
cursor: 'pointer',
opacity: String(CONFIG.BUTTON_OPACITY_IDLE),
transition: 'opacity 0.2s',
});
btn.addEventListener('mouseenter', () => btn.style.opacity = String(CONFIG.BUTTON_OPACITY_HOVER));
btn.addEventListener('mouseleave', () => btn.style.opacity = String(CONFIG.BUTTON_OPACITY_IDLE));
btn.addEventListener('click', onClick);
document.body.appendChild(btn);
}
function setBtn(text, disabled) {
const btn = document.getElementById('__pdf_grabber_btn__');
if (!btn) return;
btn.textContent = text;
btn.disabled = !!disabled;
btn.style.opacity = disabled ? '0.6' : String(CONFIG.BUTTON_OPACITY_IDLE);
}
function resetBtnLater() {
setTimeout(() => setBtn('⤓ PDF', false), 3000);
}
// ---------- 2. Click handler ----------
async function onClick() {
try {
setBtn('… searching', true);
// (A) Direct PDF on the page — the genuinely universal case.
const direct = findDirectPdf();
if (direct) {
log('Direct PDF found:', direct);
setBtn('… downloading');
downloadDirect(direct);
setBtn('✓ done');
return resetBtnLater();
}
// (B) Image-viewer fallback (Scribd-shaped).
log('No direct PDF. Trying image-viewer fallback.');
if (!hasImageViewer()) {
alert('PDF Grabber: no PDF and no recognised image viewer on this page.');
setBtn('⤓ PDF', false);
return;
}
// Prevent Scribd from evicting off-screen pages while we scroll.
disablePageEviction();
setBtn('… loading pages', true);
await scrollAllPages();
if (CONFIG.OUTPUT_MODE === 'print') {
setBtn('… preparing print');
await openPrintIframe();
// Print dialog blocks until user closes it.
setBtn('✓ done');
return resetBtnLater();
}
// 'pdf' or 'images' modes — image-only path
setBtn('… collecting');
const urls = collectImageUrls();
log(`Collected ${urls.length} image URLs.`);
if (!urls.length) {
alert('PDF Grabber: image viewer detected but 0 images collected. Page selectors may need adjusting.');
setBtn('⤓ PDF', false);
return;
}
if (CONFIG.OUTPUT_MODE === 'images') {
setBtn('… downloading');
await downloadEachImage(urls);
} else {
setBtn(`… fetching 0/${urls.length}`);
const blobs = await fetchAllAsDataURLs(urls, (i, n) => setBtn(`… fetching ${i}/${n}`));
setBtn('… building PDF');
await buildPdf(blobs);
}
setBtn('✓ done');
resetBtnLater();
} catch (err) {
console.error('[PDF-Grabber] fatal:', err);
alert('PDF Grabber error: ' + (err.message || err));
setBtn('⚠ error', false);
resetBtnLater();
}
}
// ---------- 3. Direct PDF detection ----------
function findDirectPdf() {
if (/\.pdf(\?|#|$)/i.test(location.pathname + location.search)) return location.href;
const sels = [
'embed[src*=".pdf" i]', 'embed[type="application/pdf"]',
'object[data*=".pdf" i]', 'object[type="application/pdf"]',
'iframe[src*=".pdf" i]',
];
for (const s of sels) {
const el = document.querySelector(s);
if (el) {
const u = el.getAttribute('src') || el.getAttribute('data');
if (u) return new URL(u, location.href).href;
}
}
const a = document.querySelector('a[href*=".pdf" i]');
if (a) return new URL(a.getAttribute('href'), location.href).href;
return null;
}
function downloadDirect(url) {
const a = document.createElement('a');
a.href = url; a.download = filename('.pdf');
a.target = '_blank'; a.rel = 'noopener';
document.body.appendChild(a); a.click(); a.remove();
}
// ---------- 4. Image-viewer detection & loading ----------
function hasImageViewer() {
return document.querySelector(CONFIG.IMAGE_VIEWER_PAGE_SELECTOR) != null;
}
function disablePageEviction() {
const dm = unsafeWindow.DocumentManager || window.DocumentManager;
if (!dm) return;
try {
if (typeof dm._removeUnusedPages === 'function') dm._removeUnusedPages = function () {};
const orig = dm._loadAdjacentPages;
if (typeof orig === 'function') {
dm._loadAdjacentPages = function () {
const prev = this.mobile;
this.mobile = false;
try { return orig.apply(this, arguments); }
finally { this.mobile = prev; }
};
}
log('Page eviction disabled.');
} catch (e) { warn('Could not disable page eviction:', e); }
}
async function scrollAllPages() {
const dm = unsafeWindow.DocumentManager || window.DocumentManager;
const total = countPages();
log('Page count:', total);
if (dm && typeof dm.gotoPage === 'function') {
log('Using DocumentManager.gotoPage()');
for (let i = 1; i <= total; i++) {
try { dm.gotoPage(i); } catch (e) { warn('gotoPage failed:', e); }
await sleep(CONFIG.PAGE_LOAD_WAIT_MS);
}
try { dm.gotoPage(1); } catch (_) {}
} else {
log('No DocumentManager — scrolling manually.');
const pages = document.querySelectorAll(CONFIG.IMAGE_VIEWER_PAGE_SELECTOR);
for (const p of pages) {
p.scrollIntoView({ block: 'center', behavior: 'instant' });
await sleep(CONFIG.PAGE_LOAD_WAIT_MS);
}
window.scrollTo(0, 0);
}
await sleep(CONFIG.FINAL_SETTLE_MS);
}
function countPages() {
const ids = [...document.querySelectorAll('[id^="outer_page_"]')]
.map(e => parseInt(e.id.replace(/\D+/g, ''), 10))
.filter(n => !isNaN(n));
if (ids.length) return Math.max(...ids);
return document.querySelectorAll(CONFIG.IMAGE_VIEWER_PAGE_SELECTOR).length;
}
function collectImageUrls() {
const imgs = [...document.querySelectorAll(CONFIG.IMAGE_VIEWER_IMG_SELECTOR)];
const urls = [], seen = new Set();
for (const img of imgs) {
const u = img.currentSrc || img.src || img.getAttribute('orig');
if (!u || u.startsWith('data:') || seen.has(u)) continue;
seen.add(u); urls.push(u);
}
return urls;
}
// ---------- 5. PRINT MODE: build clean iframe, then print ----------
async function openPrintIframe() {
const pages = [...document.querySelectorAll(CONFIG.IMAGE_VIEWER_PAGE_SELECTOR)];
if (!pages.length) throw new Error('No outer_page elements found.');
// Determine print page size from the inner .newpage of the first page.
const firstInner = pages[0].querySelector(CONFIG.INNER_PAGE_SELECTOR);
const pageW = parsePx(firstInner && firstInner.style.width) || 902;
const pageH = parsePx(firstInner && firstInner.style.height) || 1275;
log(`Print page size: ${pageW} × ${pageH}px`);
// Create offscreen iframe.
const iframe = document.createElement('iframe');
iframe.id = '__pdf_grabber_print_iframe__';
Object.assign(iframe.style, {
position: 'fixed', top: '0', left: '0',
width: '0', height: '0', border: 'none', opacity: '0',
pointerEvents: 'none', zIndex: '-1',
});
document.body.appendChild(iframe);
const doc = iframe.contentDocument;
doc.open();
doc.write('<!DOCTYPE html><html><head><meta charset="utf-8"><title>');
doc.write(escapeHtml(filename('')));
doc.write('</title>');
// Copy ALL stylesheets and inline styles from parent so Scribd's
// positioning rules (.text_layer transform, .absimg, fonts, etc.)
// are preserved.
document.querySelectorAll('head style, head link[rel="stylesheet"]').forEach(el => {
doc.write(el.outerHTML);
});
// Print-specific CSS: page size, page breaks, hide promos.
const hideSelectors = CONFIG.PRINT_HIDE_SELECTORS.join(',\n');
doc.write(`
<style id="__pdf_grabber_print_css__">
@page { size: ${pageW}px ${pageH}px; margin: 0; }
html, body { margin: 0 !important; padding: 0 !important; background: #fff !important; }
/* Strip Scribd's between-page promo blocks and ads */
${hideSelectors} { display: none !important; }
/* But keep the page containers visible even if they share a class pattern */
.outer_page {
display: block !important;
width: ${pageW}px !important;
height: ${pageH}px !important;
margin: 0 !important;
padding: 0 !important;
border: none !important;
box-shadow: none !important;
page-break-after: always !important;
break-after: page !important;
overflow: hidden !important;
position: relative !important;
}
.outer_page:last-child {
page-break-after: auto !important;
break-after: auto !important;
}
.newpage {
display: block !important;
transform: none !important;
transform-origin: top left !important;
width: ${pageW}px !important;
height: ${pageH}px !important;
position: relative !important;
}
.absimg { display: block !important; }
</style>
`);
doc.write('</head><body>');
// Clone every page into the iframe.
for (const p of pages) {
const clone = p.cloneNode(true);
clone.classList.remove('not_visible');
clone.classList.remove('blurred_page');
const inner = clone.querySelector(CONFIG.INNER_PAGE_SELECTOR);
if (inner) inner.style.display = 'block';
doc.write(clone.outerHTML);
}
doc.write('</body></html>');
doc.close();
// Wait for images to actually load in the iframe.
await waitForIframeReady(iframe, 30000);
// Print.
try {
iframe.contentWindow.focus();
iframe.contentWindow.print();
} catch (e) {
warn('iframe.print() threw, falling back to window.print():', e);
window.print();
}
// Clean up after a short delay.
await sleep(1500);
try { iframe.remove(); } catch (_) {}
}
function waitForIframeReady(iframe, timeoutMs) {
return new Promise(resolve => {
const start = Date.now();
const tick = () => {
const doc = iframe.contentDocument;
let ready = false;
if (doc && doc.readyState === 'complete') {
const imgs = [...doc.querySelectorAll('img')];
ready = imgs.every(img => img.complete && img.naturalWidth > 0);
}
if (ready) return resolve();
if (Date.now() - start > timeoutMs) {
warn('Iframe not fully loaded within timeout, printing anyway.');
return resolve();
}
setTimeout(tick, 200);
};
tick();
});
}
// ---------- 6. IMAGE-ONLY modes (unchanged from v1.0) ----------
async function downloadEachImage(urls) {
for (let i = 0; i < urls.length; i++) {
const url = urls[i];
const ext = (url.match(/\.(jpg|jpeg|png|webp)(\?|$)/i) || [, 'jpg'])[1];
const name = `${filename('')}_page_${String(i + 1).padStart(3, '0')}.${ext}`;
try {
if (typeof GM_download === 'function') {
GM_download({ url, name, saveAs: false });
} else {
const a = document.createElement('a');
a.href = url; a.download = name;
document.body.appendChild(a); a.click(); a.remove();
}
} catch (e) { warn('download failed:', url, e); }
await sleep(150);
}
}
async function fetchAllAsDataURLs(urls, onProgress) {
const out = [];
for (let i = 0; i < urls.length; i++) {
try { out.push(await fetchAsDataURL(urls[i])); }
catch (e) { warn('fetch failed for', urls[i], e); out.push(null); }
onProgress && onProgress(i + 1, urls.length);
}
return out;
}
function fetchAsDataURL(url) {
return new Promise((resolve, reject) => {
GM_xmlhttpRequest({
method: 'GET', url, responseType: 'blob',
onload: (resp) => {
if (resp.status >= 400) return reject(new Error('HTTP ' + resp.status));
const reader = new FileReader();
reader.onloadend = () => resolve({ dataUrl: reader.result, blob: resp.response });
reader.onerror = () => reject(reader.error);
reader.readAsDataURL(resp.response);
},
onerror: (e) => reject(new Error('network error: ' + (e && e.error))),
ontimeout: () => reject(new Error('timeout')),
});
});
}
async function buildPdf(items) {
const jsPDF = (window.jspdf && window.jspdf.jsPDF) ||
(unsafeWindow.jspdf && unsafeWindow.jspdf.jsPDF);
if (!jsPDF) throw new Error('jsPDF not loaded — check @require URL.');
let pdf = null;
for (let i = 0; i < items.length; i++) {
const item = items[i];
if (!item) continue;
const { w, h } = await imageSize(item.dataUrl);
if (!pdf) pdf = new jsPDF({ orientation: w > h ? 'l' : 'p', unit: 'px', format: [w, h] });
else pdf.addPage([w, h], w > h ? 'l' : 'p');
const fmt = /^data:image\/png/i.test(item.dataUrl) ? 'PNG' : 'JPEG';
pdf.addImage(item.dataUrl, fmt, 0, 0, w, h, undefined, 'FAST');
}
if (!pdf) throw new Error('No images successfully fetched.');
pdf.save(filename('.pdf'));
}
function imageSize(dataUrl) {
return new Promise((resolve, reject) => {
const img = new Image();
img.onload = () => resolve({ w: img.naturalWidth, h: img.naturalHeight });
img.onerror = reject;
img.src = dataUrl;
});
}
// ---------- 7. Helpers ----------
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
function parsePx(v) { return v ? parseFloat(String(v).replace('px', '')) : 0; }
function escapeHtml(s) { return String(s).replace(/[&<>"']/g, c => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[c])); }
function filename(suffix) {
let base = CONFIG.FILENAME || document.title || 'document';
base = base.replace(/[\\/:*?"<>|]+/g, '_').replace(/\s+/g, '_').slice(0, 120);
return base + suffix;
}
// ---------- 8. Boot ----------
if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', buildButton);
else buildButton();
})();