Diverse synthetic generation across an axis grid·pythonimport json, itertools
from openai import OpenAI
client = OpenAI()
def generate_pair(topic: str, difficulty: str) -> dict:
r = client.chat.completions.create(
model="gpt-4o",
messages=[{
"role": "system",
"content": (
"Generate a training example for a Python coding assistant. "
"Output JSON: {\"question\": \"...\", \"answer\": \"...\"}. "
"Answer should include code AND a one-paragraph explanation. "
f"Difficulty: {difficulty}."
),
}, {
"role": "user",
"content": f"Topic: {topic}",
}],
response_format={"type": "json_object"},
temperature=0.8,
)
return json.loads(r.choices[0].message.content)
TOPICS = ["list comprehensions", "decorators", "async/await",
"file handling", "error handling", "classes", "generators"]
DIFFICULTIES = ["beginner", "intermediate", "advanced"]
examples = []
for topic, diff in itertools.product(TOPICS, DIFFICULTIES):
for _ in range(10): # 10 per cell
pair = generate_pair(topic, diff)
examples.append({"messages": [
{"role": "system", "content": "You are a Python expert."},
{"role": "user", "content": pair["question"]},
{"role": "assistant", "content": pair["answer"]},
]})
print(f"Generated {len(examples)} examples")
# 7 topics × 3 difficulties × 10 = 210 examples