Crowdmark Downloader

Download all your graded Crowdmark assessments as PDFs through an existing Chrome session.

Get the Script

crowdmark_downloader.py

#!/usr/bin/env python3
"""
Download Crowdmark student assessments as PDFs through an existing Chrome CDP session.

Chrome must already be running with remote debugging enabled. The script reuses the
existing browser context, waits for manual login when needed, and saves PDFs with:
{YYYY}{X}_{CourseCode}_{CourseName} - {AssessmentTitle}.pdf
"""

from __future__ import annotations

import argparse
import asyncio
import os
import re
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
from urllib.parse import urljoin, urlparse

from playwright.async_api import Page, TimeoutError as PlaywrightTimeoutError
from playwright.async_api import async_playwright

try:
    from tqdm import tqdm
except ImportError:
    tqdm = None


ACTIVE_COURSES_URL = "https://app.crowdmark.com/student/courses"
ARCHIVED_COURSES_URL = "https://app.crowdmark.com/student/course-archive"
DEFAULT_OUTPUT_DIR = "crowdmark_assessment_pdfs"


async def wait_for_authentication(page: Page) -> None:
    await page.goto(ACTIVE_COURSES_URL, wait_until="domcontentloaded")
    if "/sign-in" not in page.url and "/login" not in page.url:
        await page.wait_for_load_state("networkidle", timeout=30_000)
        return
    print("Crowdmark sign-in is open. Please log in there; I will continue automatically.")
    try:
        await page.wait_for_url(
            lambda url: "/sign-in" not in url and "/login" not in url,
            timeout=10 * 60 * 1000,
        )
        await page.wait_for_load_state("networkidle", timeout=60_000)
    except PlaywrightTimeoutError:
        raise TimeoutError("Timed out waiting for Crowdmark login.")


async def save_assessment_pdf(page: Page, url: str, output_path: Path) -> None:
    await page.goto(url, wait_until="domcontentloaded")
    await page.wait_for_load_state("networkidle", timeout=60_000)
    await page.wait_for_timeout(3_000)
    await page.pdf(path=str(output_path), print_background=True, format="A4")


async def run(args: argparse.Namespace) -> int:
    output_dir = Path(args.output_dir).expanduser().resolve()
    output_dir.mkdir(parents=True, exist_ok=True)

    async with async_playwright() as playwright:
        browser = await playwright.chromium.connect_over_cdp(args.ws_url or build_cdp_ws_url())
        page = await browser.new_page()
        await wait_for_authentication(page)

        for course_url in (ACTIVE_COURSES_URL, ARCHIVED_COURSES_URL):
            links = await scrape_course_links(page, course_url)
            for course in links:
                assessments = await scrape_assessment_links(page, course.url)
                for a in assessments:
                    dest = output_dir / f"{sanitize(a.title)}.pdf"
                    if not dest.exists():
                        await save_assessment_pdf(page, a.url, dest)
    return 0


if __name__ == "__main__":
    raise SystemExit(asyncio.run(run(parse_args())))

How to Use

Install the dependencies.

python3 -m pip install playwright tqdm

Open Chrome with remote debugging enabled.

"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" --remote-debugging-port=9222

Run the downloader from this folder.

python3 crowdmark_downloader.py

Find your PDFs in crowdmark_assessment_pdfs.

FAQ

Chrome isn't accepting the WebSocket connection...????

Open chrome://inspect/#remote-debugging in Chrome and confirm remote debugging is enabled, then rerun the script.