Tests should survive refactoring. If your test breaks when you rename a private function or change how you store data internally, it's testing the how, not the what.
You refactor. You don't change any behavior. You run the tests. 40 tests fail.
None of them caught bugs. They just happened to be coupled to:
You spend the next 3 hours "fixing" tests. You didn't fix bugs. You updated tests to match your refactor. This is waste.
// 😱 BAD: Testing that you call your own functions
test('creates user', () => {
const userService = new UserService();
const validateSpy = jest.spyOn(userService, 'validateEmail');
const hashSpy = jest.spyOn(userService, 'hashPassword');
userService.createUser('alice@example.com', 'password123');
expect(validateSpy).toHaveBeenCalledWith('alice@example.com');
expect(hashSpy).toHaveBeenCalledWith('password123');
});
// This test will break if you:
// - Rename validateEmail()
// - Move validation logic elsewhere
// - Combine validateEmail and hashPassword into one function
// - Change the order of operations
// None of these are behavior changes!// ✅ GOOD: Test what actually matters
test('creates user with valid credentials', async () => {
const result = await createUser('alice@example.com', 'password123');
expect(result.success).toBe(true);
expect(result.user.email).toBe('alice@example.com');
// Verify the user exists and password is hashed
const user = await findUserByEmail('alice@example.com');
expect(user).toBeDefined();
expect(user.password).not.toBe('password123'); // hashed
expect(await verifyPassword(user.password, 'password123')).toBe(true);
});
// This test only breaks if behavior changes:
// - User isn't created
// - Password isn't hashed
// - Email is wrong
// Refactor all you want—this test doesn't care HOW you do it.# 😱 BAD: Coupled to internal implementation
def test_shopping_cart():
cart = ShoppingCart()
cart.add_item('apple', 1.50)
# Testing internal storage format
assert cart._items == {'apple': {'price': 1.50, 'quantity': 1}}
assert len(cart._items) == 1
assert cart._items['apple']['price'] == 1.50
# This breaks if you:
# - Change _items from dict to list
# - Rename _items to _products
# - Store items in a database
# - Change the internal data structure# ✅ GOOD: Test through the public interface
def test_shopping_cart():
cart = ShoppingCart()
cart.add_item('apple', 1.50)
assert cart.total() == 1.50
assert cart.item_count() == 1
assert cart.contains('apple')
items = cart.get_items()
assert len(items) == 1
assert items[0].name == 'apple'
assert items[0].price == 1.50
# Refactor the internal storage all you want.
# Use a dict, list, database, Redis—doesn't matter.
# This test only cares about the contract.// 😱 BAD: Testing the exact sequence of operations
func TestProcessOrder(t *testing.T) {
mockValidator := &MockValidator{}
mockInventory := &MockInventory{}
mockPayment := &MockPayment{}
service := NewOrderService(mockValidator, mockInventory, mockPayment)
service.ProcessOrder(order)
// Testing implementation details
if mockValidator.CallCount != 1 {
t.Error("validator should be called exactly once")
}
if mockInventory.CalledBefore(mockPayment) == false {
t.Error("inventory must be checked before payment")
}
}
// This breaks if you:
// - Change the order of validation and inventory check
// - Add caching so validator isn't always called
// - Parallelize inventory and payment checks// ✅ GOOD: Test outcomes, not implementation
func TestProcessOrder(t *testing.T) {
inventory := NewInMemoryInventory()
inventory.Add("widget", 10)
payment := NewFakePaymentGateway()
payment.AddBalance("user123", 100.00)
service := NewOrderService(inventory, payment)
result := service.ProcessOrder(Order{
UserID: "user123",
Items: []Item{{Name: "widget", Quantity: 2}},
Total: 20.00,
})
assert.True(t, result.Success)
assert.Equal(t, 8, inventory.Count("widget")) // inventory reduced
assert.Equal(t, 80.00, payment.Balance("user123")) // payment charged
}
// Refactor all you want. Optimize the order of operations.
// This test doesn't care HOW you do it, just that:
// 1. The order succeeds
// 2. Inventory is updated
// 3. Payment is chargedYour test should break when behavior changes, not when code changes.
If you refactor and tests fail—but the feature still works—those tests were bad. They were testing how you did it, not what you promised.
"A good test is indifferent to refactoring. It only cares if you broke your promise."