AutoML+¶

Modules¶

FastAPI app init for the AutoML+ service.

Route definitions for the AutoML+ service.

`analyze_web_accessibility_and_readability(file, url=None, extra_file_input=None)` `async` ¶

Run WCAG-inspired accessibility checks and optional readability analysis on HTML.

Source code in app/automlplus/router.py

@router.post("/web_access/analyze/")
async def analyze_web_accessibility_and_readability(
    file: Annotated[UploadFile, File(..., description="HTML file")],
    url: Annotated[str | None, Form(..., description="URL of website")] = None,
    extra_file_input: Annotated[
        UploadFile | None, File(..., description="Extra file for LLM context")
    ] = None,
) -> JSONResponse:
    """Run WCAG-inspired accessibility checks and optional readability analysis on HTML."""
    logger.info("Starting web accessibility and readability analysis")

    content: str | None = None
    source_name: str = "uploaded.html"
    timeout: int = int(os.getenv("WEB_ACCESSIBILITY_URL_RETRY_TIMEOUT", 10))

    # --- Load HTML content ---
    if file:
        try:
            content = (await file.read()).decode("utf-8", errors="replace")
            source_name = file.filename or source_name
            logger.debug(f"HTML file '{source_name}' successfully loaded")
        finally:
            try:
                await file.close()
            except Exception:
                logger.warning("Failed to close uploaded HTML file", exc_info=True)

    if url:
        try:
            logger.debug(f"Fetching HTML from URL: {url}")
            resp = requests.get(url, timeout=timeout)
            resp.raise_for_status()
            content = resp.text
            source_name = url
            logger.debug("HTML successfully fetched from URL")
        except Exception as e:
            logger.error(f"Failed to fetch HTML from URL: {e}")
            return JSONResponse(
                content={"error": f"Failed to fetch URL: {e}"}, status_code=400
            )

    if not content or not str(content).strip():
        logger.error("Resolved HTML content is empty")
        return JSONResponse(
            content={"error": "Resolved content is empty"}, status_code=400
        )

    content_str: str = str(content)

    # --- Load guidelines file if provided ---
    context_str: str = ""
    if extra_file_input is not None:
        try:
            logger.debug("Reading extra context file for accessibility analysis")
            guidelines_bytes = await extra_file_input.read()
            guidelines_text = guidelines_bytes.decode("utf-8", errors="replace")
            context_str = f"Accessibility guidelines to follow (user-provided):\n\n{guidelines_text}"
            logger.debug("Extra context file successfully loaded")
        finally:
            try:
                await extra_file_input.close()
            except Exception:
                logger.warning("Failed to close extra context file", exc_info=True)

    # --- Run accessibility pipeline ---
    chunk_size: int = int(os.getenv("CHUNK_SIZE_FOR_ACCESSIBILITY", 3000))
    concurrency_num: int = int(os.getenv("CONCURRENCY_NUM_FOR_ACCESSIBILITY", 4))
    logger.debug(
        f"Running accessibility pipeline with chunk size {chunk_size}, concurrency {concurrency_num}"
    )

    results = await run_accessibility_pipeline(
        content=content_str,
        filename=source_name,
        jinja_environment=jinja_environment,
        chunk_size=chunk_size,
        concurrency=concurrency_num,
        context=context_str,
    )
    logger.info("Accessibility pipeline completed successfully")

    # --- Aggregate results ---
    resolved_results = [await resolve_coroutines(r) for r in results]

    scores = [
        r.get("score")
        for r in resolved_results
        if isinstance(r.get("score"), (int, float))
    ]
    average_score: float | None = (sum(scores) / len(scores)) if scores else None
    logger.debug(f"Computed average accessibility score: {average_score}")

    # --- Readability analysis ---
    readability_scores: dict[str, Any] | None = None
    try:
        text = extract_text_from_html_bytes(content_str.encode("utf-8"))
        if text.strip():
            readability_scores = ReadabilityAnalyzer.analyze(text)
            logger.debug("Readability analysis completed successfully")
    except Exception as e:
        logger.warning(f"Error during readability analysis: {e}")
        readability_scores = {"error": str(e)}

    payload = {
        "source": source_name,
        "average_score": average_score,
        "results": resolved_results,
        "readability": readability_scores,
    }

    safe_payload = json_safe(payload)
    logger.info("Web accessibility and readability analysis finished successfully")
    return JSONResponse(content=safe_payload)

`check_alt_text(image_url=Form(...), alt_text=Form(...))` `async` ¶

Evaluate provided alt text against the referenced image using an LLM.

Source code in app/automlplus/router.py

@router.post("/web_access/check-alt-text/")
async def check_alt_text(
    image_url: str = Form(...),
    alt_text: str = Form(...),
) -> JSONResponse:
    """Evaluate provided alt text against the referenced image using an LLM."""
    logger.info(f"Checking alt text for image URL: {image_url}")
    try:
        result: dict[str, Any] = AltTextChecker.check(
            jinja_environment, image_url, alt_text
        )
        logger.info("Alt-text evaluation completed successfully")

        safe_result = json_safe(
            {
                "src": image_url,
                "alt_text": alt_text,
                "evaluation": result,
            }
        )
        return JSONResponse(content=safe_result)
    except Exception as e:
        logger.exception("Error during alt-text check: %s", e)
        return JSONResponse(content={"error": str(e)}, status_code=500)

`image_to_website(image_file=File(default=None))` `async` ¶

Convert an uploaded image into a basic HTML website structure.

Source code in app/automlplus/router.py

@router.post("/image_tools/image_to_website/")
async def image_to_website(
    image_file: UploadFile | None = File(default=None),
) -> JSONResponse:
    """Convert an uploaded image into a basic HTML website structure."""
    logger.info("Starting image-to-website conversion")
    try:
        # TODO: Implement image-to-website logic (currently placeholder)
        logger.info("Image-to-website conversion completed successfully")
        return JSONResponse(content={})
    except Exception as e:
        logger.exception("Error during image-to-website conversion: %s", e)
        return JSONResponse(content={"error": str(e)}, status_code=500)

`run_on_image(prompt=Form(...), model=Form(default=None), image_file=File(default=None), image_url=Form(default=None))` `async` ¶

Run a vision-language model on an image and return the text output.

Source code in app/automlplus/router.py

@router.post("/image_tools/run_on_image/")
async def run_on_image(
    prompt: str = Form(...),
    model: str | None = Form(default=None),
    image_file: UploadFile | None = File(default=None),
    image_url: str | None = Form(default=None),
) -> JSONResponse:
    """Run a vision-language model on an image and return the text output."""
    logger.info("Running model on image with prompt: %s", prompt)

    if image_file is None and not image_url:
        logger.error("Missing both image_file and image_url")
        return JSONResponse(
            {"error": "Provide image_file or image_url"}, status_code=400
        )

    try:
        image_bytes: bytes | None = await image_file.read() if image_file else None
        if image_file:
            await image_file.close()
            logger.debug("Image file successfully read and closed")

        result = ImagePromptRunner.run(
            image_bytes=image_bytes,
            image_path_or_url=image_url,
            prompt=prompt,
            model=model,
            jinja_environment=jinja_environment,
        )

        safe_result = json_safe({"response": result})
        logger.info("Image prompt run completed successfully")
        return JSONResponse(content=safe_result)
    except Exception as e:
        logger.exception("Error during image prompt run: %s", e)
        return JSONResponse({"error": str(e)}, status_code=500)

`run_on_image_stream(prompt='', model=None, image_file=None, image_url=None)` `async` ¶

Stream a vision-language model's output on an image and prompt.

Source code in app/automlplus/router.py

@router.post(
    "/image_tools/run_on_image_stream/",
    response_model=None,
)
async def run_on_image_stream(
    prompt: Annotated[str, Form(..., description="Prompt to apply on the image")] = "",
    model: Annotated[
        str | None, Form(..., description="Model to apply on the image")
    ] = None,
    image_file: Annotated[
        UploadFile | None, File(..., description="Image file if not a URL")
    ] = None,
    image_url: Annotated[
        str | None, Form(..., description="Image URL if not a file but an URL")
    ] = None,
) -> Response:
    """Stream a vision-language model's output on an image and prompt."""
    logger.info("Streaming model output for image prompt: %s", prompt)

    if image_file is None and not image_url:
        logger.error("No image or URL provided for streaming run")
        return JSONResponse(
            content={"error": "Provide image_file or image_url"}, status_code=400
        )

    try:
        image_bytes: bytes | None = None
        if image_file is not None:
            try:
                image_bytes = await image_file.read()
                logger.debug("Image file successfully read for streaming")
            finally:
                try:
                    await image_file.close()
                except Exception:
                    logger.warning("Failed to properly close image file", exc_info=True)

        def generator():
            logger.debug("Starting stream generator for image model run")
            for chunk in ImagePromptRunner.run_stream(
                image_bytes=image_bytes,
                image_path_or_url=image_url,
                prompt=prompt,
                model=model,
                jinja_environment=jinja_environment,
            ):
                yield chunk

        logger.info("Image stream initiated successfully")
        return StreamingResponse(generator(), media_type="text/plain")

    except Exception as e:
        logger.exception("Error during image prompt streaming run: %s", e)
        return JSONResponse(content={"error": str(e)}, status_code=500)

`ImageConverter` ¶

Convert images to base64 from local paths or URLs.

Source code in app/automlplus/utils.py

class ImageConverter:
    """Convert images to base64 from local paths or URLs."""

    @staticmethod
    def to_base64(image_path_or_url: str) -> str:
        logger.info("Converting image to base64: %s", image_path_or_url)
        try:
            if image_path_or_url.startswith("http"):
                headers = {"User-Agent": "Mozilla/5.0 (compatible; ImageConverter/1.0)"}
                resp = requests.get(image_path_or_url, headers=headers)
                resp.raise_for_status()
                if "image" not in resp.headers.get("Content-Type", ""):
                    raise ValueError(
                        f"URL does not point to an image: {image_path_or_url}"
                    )
                image = Image.open(BytesIO(resp.content))
            else:
                if not os.path.isfile(image_path_or_url):
                    raise FileNotFoundError(f"No such file: {image_path_or_url}")
                image = Image.open(image_path_or_url)

            image = image.convert("RGBA")
            buffer = BytesIO()
            image.save(buffer, format="PNG")
            return base64.b64encode(buffer.getvalue()).decode("utf-8")
        except Exception as e:
            logger.exception("Image conversion failed")
            raise e

    @staticmethod
    def bytes_to_base64(image_bytes: bytes) -> str:
        """Convert raw image bytes to base64 PNG string."""
        try:
            image = Image.open(BytesIO(image_bytes))
            buffer = BytesIO()
            image.save(buffer, format="PNG")
            return base64.b64encode(buffer.getvalue()).decode("utf-8")
        except Exception as e:
            logger.exception("Image bytes conversion failed")
            raise e

`bytes_to_base64(image_bytes)` `staticmethod` ¶

Convert raw image bytes to base64 PNG string.

Source code in app/automlplus/utils.py

@staticmethod
def bytes_to_base64(image_bytes: bytes) -> str:
    """Convert raw image bytes to base64 PNG string."""
    try:
        image = Image.open(BytesIO(image_bytes))
        buffer = BytesIO()
        image.save(buffer, format="PNG")
        return base64.b64encode(buffer.getvalue()).decode("utf-8")
    except Exception as e:
        logger.exception("Image bytes conversion failed")
        raise e

`extract_text_from_html_bytes(content)` ¶

Extract readable text from raw HTML bytes.

Source code in app/automlplus/utils.py

def extract_text_from_html_bytes(content: bytes) -> str:
    """Extract readable text from raw HTML bytes."""
    soup = BeautifulSoup(content, features="html.parser")
    for script in soup(["script", "style"]):
        script.extract()
    lines = (line.strip() for line in soup.get_text().splitlines())
    phrases = (phrase.strip() for line in lines for phrase in line.split("  "))
    text = "\n".join(chunk for chunk in phrases if chunk)
    return text

`json_safe(data)` ¶

Recursively convert string values to JSON-safe strings.

Source code in app/automlplus/utils.py

def json_safe(data: Any) -> Any:
    """Recursively convert string values to JSON-safe strings."""
    if isinstance(data, dict):
        return {k: json_safe(v) for k, v in data.items()}
    elif isinstance(data, list):
        return [json_safe(v) for v in data]
    elif isinstance(data, str):
        return (
            data.replace("\\", "\\\\")
            .replace('"', '\\"')
            .replace("\n", "\\n")
            .replace("\r", "\\r")
        )
    else:
        return data

Tools¶

Static analysis tools for AutoML+.

Static tools derive insights from content using deterministic, rule-based libraries — no LLM calls are made. They are fast, reproducible, and require no API credentials.

Current tools:

ReadabilityAnalyzer — computes textstat readability metrics (Flesch Reading Ease, word counts, sentence length, etc.) over a plain-text string.
split_chunks — splits an HTML/text string into fixed-size character chunks while tracking the original 1-based line ranges for each chunk.

`ReadabilityAnalyzer` ¶

Compute readability metrics for a piece of text.

Source code in app/automlplus/tools/static.py

class ReadabilityAnalyzer:
    """Compute readability metrics for a piece of text."""

    METRICS = {
        "Flesch Reading Ease": textstat.flesch_reading_ease,
        "Difficult Words": textstat.difficult_words,
        "Lexicon Count": textstat.lexicon_count,
        "Avg Sentence Length": textstat.words_per_sentence,
    }

    @staticmethod
    def apply_metric(metric, text: str) -> Any:
        try:
            value = metric(text)
            if isinstance(value, float) and not isfinite(value):
                return None
            if isinstance(value, (int, float, str)):
                return value
            return str(value)
        except Exception:
            logger.warning("Metric failed: %s", metric.__name__)
            return "N/A"

    @classmethod
    def analyze(cls, text: str) -> Dict[str, Any]:
        logger.info("Running readability metrics")
        return {
            name: cls.apply_metric(metric, text) for name, metric in cls.METRICS.items()
        }

`split_chunks(content, chunk_size)` ¶

Split content into fixed-size character chunks and return 1-based (start_line, end_line) ranges for each chunk.

Line ranges are accurate even when chunks start/end mid-line.

Source code in app/automlplus/tools/static.py

def split_chunks(
    content: str, chunk_size: int
) -> Tuple[List[str], List[Tuple[int, int]]]:
    """
    Split content into fixed-size character chunks and return
    1-based (start_line, end_line) ranges for each chunk.

    Line ranges are accurate even when chunks start/end mid-line.
    """
    if chunk_size <= 0:
        raise ValueError("chunk_size must be > 0")

    lines = content.splitlines(keepends=True)

    line_offsets = [0]
    for line in lines:
        line_offsets.append(line_offsets[-1] + len(line))

    chunks: List[str] = []
    line_ranges: List[Tuple[int, int]] = []

    content_len = len(content)
    i = 0

    while i < content_len:
        end = min(i + chunk_size, content_len)
        chunks.append(content[i:end])

        start_line = bisect.bisect_right(line_offsets, i) - 1
        end_line = bisect.bisect_left(line_offsets, end) - 1

        line_ranges.append((start_line + 1, max(start_line + 1, end_line + 1)))

        i = end

    return chunks, line_ranges

LLM-over-text tools for AutoML+.

Text tools send plain-text content (HTML chunks, documents, etc.) to a language model and parse the structured response. Unlike VLM tools, no image input is required; unlike static tools, they rely on an external LLM API.

Current tools:

ChunkResult — dataclass holding the outcome (score, image feedback, LLM response, or error) for a single processed text chunk.
_process_single_chunk — sends one HTML chunk to the LLM for WCAG analysis, extracts a numeric score from the response, and runs AltTextChecker on any <img> tags found in the chunk.

`ChunkResult` `dataclass` ¶

Result for processing a single chunk of an HTML file.

Source code in app/automlplus/tools/text.py

@dataclass
class ChunkResult:
    """Result for processing a single chunk of an HTML file."""

    chunk: int
    start_line: int
    end_line: int
    score: float | None
    image_feedback: List[Dict[str, Any]]
    llm_response: str | None
    error: str | None = None

VLM (Vision Language Model) tools for AutoML+.

A VLM task involves passing one or more images together with a text prompt to a multimodal language model and processing its response. The classes here cover two use-cases:

ImagePromptRunner — general-purpose: run or stream any user-supplied prompt over an image (file upload or URL).
AltTextChecker — specialised: evaluate whether provided alt text accurately describes an image, using a structured VLM prompt defined in Jinja2 templates.

`AltTextChecker` ¶

Check whether provided alt text matches an image using a VLM.

Source code in app/automlplus/tools/vlm.py

class AltTextChecker:
    """Check whether provided alt text matches an image using a VLM."""

    DEFAULT_MODEL = "gpt-4o-mini"

    @staticmethod
    def _resolve_model(model: str) -> str:
        """Return a valid model string, normalizing common aliases and falling back to default."""
        if not model or model.strip() == "":
            logger.error(
                "Model parameter is empty or None, using default '%s'",
                AltTextChecker.DEFAULT_MODEL,
            )
            return AltTextChecker.DEFAULT_MODEL

        candidate = model.strip()
        lower = candidate.lower().replace(" ", "")

        if lower in {"gpt40-mini", "gpt4o-mini"}:
            return "gpt-4o-mini"

        return lower

    @staticmethod
    def _build_messages(
        jinja_environment: Environment, image_b64: str, alt_text: str
    ) -> list[dict]:
        """Construct the message payload for the VLM call."""
        return [
            {
                "role": "system",
                "content": render_template(
                    jinja_environment, "wcag_checker_default_prompt.txt"
                ),
            },
            {"role": "user", "content": f"Alt text: {alt_text}"},
            {
                "role": "user",
                "content": render_template(
                    jinja_environment, "image_alt_checker_prompt.txt"
                ),
                "images": [image_b64],
            },
        ]

    @staticmethod
    def _redact_messages_for_log(messages: list[dict]) -> list[dict]:
        """Return a copy of messages with any base64 image payloads redacted for logging."""
        redacted: list[dict] = []
        for message in messages:
            msg_copy = {k: v for k, v in message.items() if k != "images"}
            if "images" in message:
                safe_images = []
                for img in message["images"]:
                    length_hint = len(img) if isinstance(img, str) else None
                    safe_images.append(
                        f"<redacted_base64 length={length_hint}>"
                        if length_hint is not None
                        else "<redacted_base64>"
                    )
                msg_copy["images"] = safe_images
            redacted.append(msg_copy)
        return redacted

    @staticmethod
    def check(
        jinja_environment: Environment,
        image_url_or_path: str,
        alt_text: str,
        model: str = os.getenv("ALT_TEXT_CHECKER_MODEL", DEFAULT_MODEL),
    ) -> str:
        logger.info("Checking alt-text using model %s", model)
        model = AltTextChecker._resolve_model(model)

        try:
            image_b64 = ImageConverter.to_base64(image_url_or_path)

            messages = AltTextChecker._build_messages(
                jinja_environment=jinja_environment,
                image_b64=image_b64,
                alt_text=alt_text,
            )

            logger.info("Sending request with model: %s", model)
            logger.info(
                "Messages structure (redacted): %s",
                AltTextChecker._redact_messages_for_log(messages),
            )

            response_content = ChatHandler.chat_sync_messages(
                messages=messages,
                model=model,
            )

            return response_content

        except Exception as e:
            logger.exception("AltTextChecker failed with error: %s", str(e))
            logger.error("Model used: %s", model)
            try:
                logger.error(
                    "Messages sent (redacted): %s",
                    AltTextChecker._redact_messages_for_log(messages),
                )
            except Exception:
                logger.error("Messages sent (redaction_failed)")
            raise

`ImagePromptRunner` ¶

Run a VLM on an image and user-provided prompt.

Source code in app/automlplus/tools/vlm.py

class ImagePromptRunner:
    """Run a VLM on an image and user-provided prompt."""

    DEFAULT_MODEL: str = os.getenv("IMAGE_PROMPT_MODEL", "gpt-4o-mini")

    @staticmethod
    def _resolve_model(model: str | None) -> str:
        if not model or not str(model).strip():
            return ImagePromptRunner.DEFAULT_MODEL
        return model

    @staticmethod
    def build_messages(
        jinja_environment: Environment | None, image_b64: str, prompt: str
    ) -> list[dict[str, str | list[str] | list[None]]]:
        messages: list[dict[str, str | list[str] | list[None]]] = []
        if jinja_environment is not None:
            try:
                system_prompt = render_template(
                    jinja_environment, "image_to_website_prompt.txt"
                )
                messages.append({"role": "system", "content": system_prompt})
            except Exception as e:
                raise e
        messages.append({"role": "user", "content": prompt, "images": [image_b64]})
        return messages

    @staticmethod
    def run(
        image_bytes: bytes | None = None,
        image_path_or_url: str | None = None,
        prompt: str = "",
        model: str | None = None,
        jinja_environment: Environment | None = None,
    ) -> str:
        model_name = ImagePromptRunner._resolve_model(model)
        try:
            if image_bytes is None and not image_path_or_url:
                raise ValueError("Provide either image_bytes or image_path_or_url")

            image_b64 = (
                ImageConverter.bytes_to_base64(image_bytes)
                if image_bytes is not None
                else ImageConverter.to_base64(str(image_path_or_url))
            )

            messages = ImagePromptRunner.build_messages(
                jinja_environment, image_b64, prompt
            )

            return ChatHandler.chat_sync_messages(messages=messages, model=model_name)
        except Exception as e:
            logger.exception("ImagePromptRunner failed")
            raise e

    @staticmethod
    def run_stream(
        image_bytes: bytes | None = None,
        image_path_or_url: str | None = None,
        prompt: str = "",
        model: str | None = None,
        jinja_environment: Environment | None = None,
    ) -> str:
        """Stream VLM output for an image+prompt interaction. Yields incremental text chunks."""
        model_name = ImagePromptRunner._resolve_model(model)
        if image_bytes is None and not image_path_or_url:
            raise ValueError("Provide either image_bytes or image_path_or_url")

        image_b64 = (
            ImageConverter.bytes_to_base64(image_bytes)
            if image_bytes is not None
            else ImageConverter.to_base64(str(image_path_or_url))
        )
        messages = ImagePromptRunner.build_messages(
            jinja_environment, image_b64, prompt
        )
        return ChatHandler.chat_stream_messages_sync(
            messages=messages, model=model_name
        )

`run_stream(image_bytes=None, image_path_or_url=None, prompt='', model=None, jinja_environment=None)` `staticmethod` ¶

Stream VLM output for an image+prompt interaction. Yields incremental text chunks.

Source code in app/automlplus/tools/vlm.py

@staticmethod
def run_stream(
    image_bytes: bytes | None = None,
    image_path_or_url: str | None = None,
    prompt: str = "",
    model: str | None = None,
    jinja_environment: Environment | None = None,
) -> str:
    """Stream VLM output for an image+prompt interaction. Yields incremental text chunks."""
    model_name = ImagePromptRunner._resolve_model(model)
    if image_bytes is None and not image_path_or_url:
        raise ValueError("Provide either image_bytes or image_path_or_url")

    image_b64 = (
        ImageConverter.bytes_to_base64(image_bytes)
        if image_bytes is not None
        else ImageConverter.to_base64(str(image_path_or_url))
    )
    messages = ImagePromptRunner.build_messages(
        jinja_environment, image_b64, prompt
    )
    return ChatHandler.chat_stream_messages_sync(
        messages=messages, model=model_name
    )

Website Accessibility¶

Orchestration pipeline for web accessibility analysis.

This module coordinates the full accessibility analysis workflow: it splits an HTML document into chunks, fans out concurrent LLM-over-text analysis via _process_single_chunk, and aggregates results. It is intentionally thin — all tool logic lives in app.automlplus.tools.

run_accessibility_pipeline — main entry point; returns a list of ChunkResult objects, one per chunk.
resolve_coroutines — utility to recursively await coroutine-valued attributes when serialising results.
stream_accessibility_results — streams the resolved results as a single JSON array (used for streaming response endpoints).

`resolve_coroutines(obj)` `async` ¶

Recursively await any coroutine attributes in an object.

Source code in app/automlplus/website_accessibility/pipeline.py

async def resolve_coroutines(obj: Any) -> Any:
    """Recursively await any coroutine attributes in an object."""
    if asyncio.iscoroutine(obj):
        return await obj
    elif isinstance(obj, dict):
        return {k: await resolve_coroutines(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [await resolve_coroutines(v) for v in obj]
    elif hasattr(obj, "__dict__"):
        new_obj = {}
        for k, v in vars(obj).items():
            new_obj[k] = await resolve_coroutines(v)
        return new_obj
    else:
        return obj

`run_accessibility_pipeline(content, filename, jinja_environment, chunk_size, concurrency=4, context='')` `async` ¶

Split HTML into chunks and process them concurrently with a semaphore.

Source code in app/automlplus/website_accessibility/pipeline.py

async def run_accessibility_pipeline(
    content: str,
    filename: str,
    jinja_environment,
    chunk_size: int,
    concurrency: int = 4,
    context: str = "",
) -> List[ChunkResult]:
    """Split HTML into chunks and process them concurrently with a semaphore."""
    chunks, ranges = split_chunks(content, chunk_size)
    logger.info("Processing the website in %d chunks", len(chunks))
    sem = asyncio.Semaphore(concurrency)
    tasks = [
        _process_single_chunk(
            i, chunk, start, end, len(chunks), filename, jinja_environment, sem, context
        )
        for i, (chunk, (start, end)) in enumerate(zip(chunks, ranges))
    ]
    results: List[ChunkResult] = await asyncio.gather(*tasks)
    return results

`stream_accessibility_results(results)` `async` ¶

Stream results as a single JSON array instead of JSONL.

Source code in app/automlplus/website_accessibility/pipeline.py

async def stream_accessibility_results(results):
    """Stream results as a single JSON array instead of JSONL."""
    resolved = []
    for item in results:
        if asyncio.iscoroutine(item):
            try:
                item = await item
            except Exception as e:
                resolved.append({"error": str(e)})
                continue

        try:
            data = await resolve_coroutines(item)
        except Exception as e:
            data = {"error": f"Failed to resolve item: {e}"}

        resolved.append(data)

    yield json.dumps(resolved, indent=2).encode("utf-8")

AutoML+¶

Modules¶

analyze_web_accessibility_and_readability(file, url=None, extra_file_input=None) async ¶

check_alt_text(image_url=Form(...), alt_text=Form(...)) async ¶

image_to_website(image_file=File(default=None)) async ¶

run_on_image(prompt=Form(...), model=Form(default=None), image_file=File(default=None), image_url=Form(default=None)) async ¶

run_on_image_stream(prompt='', model=None, image_file=None, image_url=None) async ¶

ImageConverter ¶

bytes_to_base64(image_bytes) staticmethod ¶

extract_text_from_html_bytes(content) ¶

json_safe(data) ¶

Tools¶

ReadabilityAnalyzer ¶

split_chunks(content, chunk_size) ¶

ChunkResult dataclass ¶

AltTextChecker ¶

ImagePromptRunner ¶

run_stream(image_bytes=None, image_path_or_url=None, prompt='', model=None, jinja_environment=None) staticmethod ¶

Website Accessibility¶

resolve_coroutines(obj) async ¶

run_accessibility_pipeline(content, filename, jinja_environment, chunk_size, concurrency=4, context='') async ¶

stream_accessibility_results(results) async ¶

`analyze_web_accessibility_and_readability(file, url=None, extra_file_input=None)` `async` ¶

`check_alt_text(image_url=Form(...), alt_text=Form(...))` `async` ¶

`image_to_website(image_file=File(default=None))` `async` ¶

`run_on_image(prompt=Form(...), model=Form(default=None), image_file=File(default=None), image_url=Form(default=None))` `async` ¶

`run_on_image_stream(prompt='', model=None, image_file=None, image_url=None)` `async` ¶

`ImageConverter` ¶

`bytes_to_base64(image_bytes)` `staticmethod` ¶

`extract_text_from_html_bytes(content)` ¶

`json_safe(data)` ¶

`ReadabilityAnalyzer` ¶

`split_chunks(content, chunk_size)` ¶

`ChunkResult` `dataclass` ¶

`AltTextChecker` ¶

`ImagePromptRunner` ¶

`run_stream(image_bytes=None, image_path_or_url=None, prompt='', model=None, jinja_environment=None)` `staticmethod` ¶

`resolve_coroutines(obj)` `async` ¶

`run_accessibility_pipeline(content, filename, jinja_environment, chunk_size, concurrency=4, context='')` `async` ¶

`stream_accessibility_results(results)` `async` ¶