Crowdmark Downloader
Download all your graded Crowdmark assessments as PDFs through an existing Chrome session.
Get the Script
Download Script
crowdmark_downloader.py
#!/usr/bin/env python3
"""
Download Crowdmark student assessments as PDFs through an existing Chrome CDP session.
Chrome must already be running with remote debugging enabled. The script reuses the
existing browser context, waits for manual login when needed, and saves PDFs with:
{YYYY}{X}_{CourseCode}_{CourseName} - {AssessmentTitle}.pdf
"""
from __future__ import annotations
import argparse
import asyncio
import os
import re
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
from urllib.parse import urljoin, urlparse
from playwright.async_api import Page, TimeoutError as PlaywrightTimeoutError
from playwright.async_api import async_playwright
try:
from tqdm import tqdm
except ImportError:
tqdm = None
ACTIVE_COURSES_URL = "https://app.crowdmark.com/student/courses"
ARCHIVED_COURSES_URL = "https://app.crowdmark.com/student/course-archive"
DEFAULT_OUTPUT_DIR = "crowdmark_assessment_pdfs"
async def wait_for_authentication(page: Page) -> None:
await page.goto(ACTIVE_COURSES_URL, wait_until="domcontentloaded")
if "/sign-in" not in page.url and "/login" not in page.url:
await page.wait_for_load_state("networkidle", timeout=30_000)
return
print("Crowdmark sign-in is open. Please log in there; I will continue automatically.")
try:
await page.wait_for_url(
lambda url: "/sign-in" not in url and "/login" not in url,
timeout=10 * 60 * 1000,
)
await page.wait_for_load_state("networkidle", timeout=60_000)
except PlaywrightTimeoutError:
raise TimeoutError("Timed out waiting for Crowdmark login.")
async def save_assessment_pdf(page: Page, url: str, output_path: Path) -> None:
await page.goto(url, wait_until="domcontentloaded")
await page.wait_for_load_state("networkidle", timeout=60_000)
await page.wait_for_timeout(3_000)
await page.pdf(path=str(output_path), print_background=True, format="A4")
async def run(args: argparse.Namespace) -> int:
output_dir = Path(args.output_dir).expanduser().resolve()
output_dir.mkdir(parents=True, exist_ok=True)
async with async_playwright() as playwright:
browser = await playwright.chromium.connect_over_cdp(args.ws_url or build_cdp_ws_url())
page = await browser.new_page()
await wait_for_authentication(page)
for course_url in (ACTIVE_COURSES_URL, ARCHIVED_COURSES_URL):
links = await scrape_course_links(page, course_url)
for course in links:
assessments = await scrape_assessment_links(page, course.url)
for a in assessments:
dest = output_dir / f"{sanitize(a.title)}.pdf"
if not dest.exists():
await save_assessment_pdf(page, a.url, dest)
return 0
if __name__ == "__main__":
raise SystemExit(asyncio.run(run(parse_args())))How to Use
1
Install the dependencies.
python3 -m pip install playwright tqdm2
Open Chrome with remote debugging enabled.
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" --remote-debugging-port=92223
Sign in to Crowdmark.
4
Run the downloader from this folder.
python3 crowdmark_downloader.py05
Find your PDFs in crowdmark_assessment_pdfs.
FAQ
Chrome isn't accepting the WebSocket connection...????
Open chrome://inspect/#remote-debugging in Chrome and confirm remote debugging is enabled, then rerun the script.