Em dash investigation code

get_results.py

import json
import os
import sys
import time
import urllib.request
import urllib.error
from datetime import datetime, timezone

API_URL = "https://openrouter.ai/api/v1/chat/completions"

MODELS = [
    "anthropic/claude-sonnet-4.6",
    "anthropic/claude-opus-4.7",
    "openai/gpt-4o",
    "openai/gpt-4.1",
    "openai/o3",
    "google/gemini-2.5-pro",
    "google/gemini-2.5-flash",
    "deepseek/deepseek-chat",
    "deepseek/deepseek-r1",
    "meta-llama/llama-4-maverick",
    "mistralai/mistral-large",
    "qwen/qwen3-235b-a22b",
    "x-ai/grok-4.20",
    "cohere/command-a",
    "openai/gpt-5",
    "openai/gpt-5.5",
    "openai/gpt-5.4-mini",
]

PROMPTS = [
    "Write a short LinkedIn post with a title about a career lesson you learned the hard way.",
    "Write a reddit post with a title reviewing a novel you loved but that most people overlook.",
    "Write a blog post with a title about why a hobby you love is misunderstood by most people.",
    "Write a reddit post with a title telling the story of a project that seemed doomed but turned out well in the end.",
    "Write a blog post with a title reflecting on how your relationship with technology has changed over the past decade.",
]

RUNS_PER_PROMPT = 3
OUTPUT_FILE = "results_v2.jsonl"
MAX_RETRIES = 3


def load_completed(path):
    completed = set()
    if not os.path.exists(path):
        return completed
    with open(path) as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            try:
                record = json.loads(line)
                completed.add((record["model"], record["prompt_index"], record["run"]))
            except (json.JSONDecodeError, KeyError):
                continue
    return completed


def call_openrouter(api_key, model, prompt):
    body = json.dumps({
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
    }).encode()

    req = urllib.request.Request(
        API_URL,
        data=body,
        headers={
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
        },
    )

    for attempt in range(MAX_RETRIES):
        try:
            with urllib.request.urlopen(req, timeout=120) as resp:
                data = json.loads(resp.read().decode())
            return data
        except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
            if attempt == MAX_RETRIES - 1:
                raise
            wait = 2 ** (attempt + 1)
            print(f"  Retry {attempt + 1}/{MAX_RETRIES} after {wait}s: {e}")
            time.sleep(wait)


def main():
    api_key = os.environ.get("OPENROUTER_API_KEY")
    if not api_key:
        print("Error: OPENROUTER_API_KEY environment variable not set", file=sys.stderr)
        sys.exit(1)

    completed = load_completed(OUTPUT_FILE)
    total = len(MODELS) * len(PROMPTS) * RUNS_PER_PROMPT
    skipped = len(completed)
    if skipped:
        print(f"Resuming: {skipped}/{total} already completed")

    counter = 0
    for model in MODELS:
        for prompt_index, prompt in enumerate(PROMPTS):
            for run in range(1, RUNS_PER_PROMPT + 1):
                counter += 1
                if (model, prompt_index, run) in completed:
                    continue

                print(f"[{counter}/{total}] {model} prompt={prompt_index} run={run}")

                try:
                    data = call_openrouter(api_key, model, prompt)
                except Exception as e:
                    print(f"  FAILED: {e}", file=sys.stderr)
                    continue

                choice = data.get("choices", [{}])[0]
                response_text = choice.get("message", {}).get("content", "")
                usage = data.get("usage", {})

                record = {
                    "model": model,
                    "prompt_index": prompt_index,
                    "prompt": prompt,
                    "run": run,
                    "response": response_text,
                    "timestamp": datetime.now(timezone.utc).isoformat(),
                    "usage": usage,
                }

                with open(OUTPUT_FILE, "a") as f:
                    f.write(json.dumps(record) + "\n")

                time.sleep(2)

    final_completed = load_completed(OUTPUT_FILE)
    print(f"\nDone. {len(final_completed)}/{total} results saved to {OUTPUT_FILE}")

if __name__ == "__main__":
    main()

analyze_dashes.py

import json
import re
from collections import defaultdict, Counter
import sys

CATEGORIES = [
    "em_dash_spaced", "em_dash_unspaced", "en_dash_spaced",
]

PATTERNS = [
    ("em_dash_spaced", re.compile(r"(?<=\w) — (?=\w)")),
    ("em_dash_unspaced", re.compile(r"(?<=\w)—(?=\w)")),
    ("en_dash_spaced", re.compile(r"(?<=\w) – (?=\w)")),
]


def find_matches(text):
    hits = []
    for label, pattern in PATTERNS:
        for m in pattern.finditer(text):
            hits.append((label, m.start(), m.end()))
    hits.sort(key=lambda h: h[1])
    return hits

def context_snippet(text, start, end):
    CONTEXT_CHARS = 45
    left = max(0, start - CONTEXT_CHARS)
    right = min(len(text), end + CONTEXT_CHARS)
    snippet = text[left:right]
    prefix = "..." if left > 0 else ""
    suffix = "..." if right < len(text) else ""
    snippet = ' '.join(snippet.split())
    return f"{prefix}{snippet}{suffix}"

def provider_from_model(model):
    return model.split("/")[0]

def main():
    dash_counters_by_model = defaultdict(Counter)
    char_counts_by_model = Counter()
    f = open("results_v2.jsonl")
    for line in f:
        line = line.strip()
        if not line:
            continue
        record = json.loads(line)
        model = record["model"]
        text = record["response"] or ""
        prompt_index = record["prompt_index"]
        hits = find_matches(text)

        char_counts_by_model[model] += len(text)
        
        for label, start, end in hits:
            dash_counters_by_model[model][label] += 1
            snippet = context_snippet(text, start, end)
            print(f"print_me [ {label:^16} ] {model:30s} p={prompt_index} {snippet}")
    f.close()

    header = f'| model | spaced em dashes | em dashes | en dashes |'
    header_underlines = '| :--- | :---: | :---: | :---: |'

    divider = "-" * 96
    providers = defaultdict(list)
    for model in dash_counters_by_model:
        providers[provider_from_model(model)].append(model)

    print(f"\n{'=' * 80}")
    print("PER-MODEL SUMMARY")
    print(f"{'=' * 80}")
    print(header)
    print(header_underlines)
    for provider in sorted(providers):
        for model in sorted(providers[provider]):
            c = dash_counters_by_model[model]
            char_count = char_counts_by_model[model]
            dash_count = sum(n for n in c.values())
            dash_ratio = 100.0 * dash_count / char_count  

            print(f"| {model} | {c['em_dash_spaced']} | {c['em_dash_unspaced']} | {c['en_dash_spaced']} | {dash_ratio:.3F}% |")

if __name__ == "__main__":
    main()