Pydantic v2 Migration

22-05-2025

Settings

# v1
from pydantic import BaseSettings
 
class Settings(BaseSettings):
    VALUE = '123'
 
# v2
from pydantic_settings import BaseSettings
    VALUE: str = '123'

Model configuration

# v1
from pydantic import BaseModel, Extra
 
class User(BaseModel):
    name: str
 
    class Config:
        extra = Extra.forbid
 
# v2
from pydantic import ConfigDict, BaseModel
 
class User(BaseModel):
    name: str
 
    model_config = ConfigDict(extra="forbid")

Type coercion

identifier: str = Field(..., coerce_numbers_to_str=True)

Handling .env files during test cases

from pydantic_settings import BaseSettings, SettingsConfigDict
 
class Settings(BaseSettings):
    APP_URL: str = "[http://localhost:8000"](http://localhost:8000")
    model_config = SettingsConfigDict(env_file='.env', env_file_encoding='utf-8')
 
 
settings = Settings(_env_file=None, _env_file_encoding='utf-8')

Annotated types

# pydantic v2
from typing import Annotated
from pydantic import Field
 
PositiveInt = Annotated[int, Field(gt=0)] 
 
# annotated-types
from annotated_types import Gt
 
PositiveInt = Annotated[int, Gt(0)]
 
# custom validation and serialization
from typing import Annotated
 
from pydantic import (
    AfterValidator,
    PlainSerializer,
    TypeAdapter,
    WithJsonSchema,
)
 
TruncatedFloat = Annotated[
    float,
    AfterValidator(lambda x: round(x, 1)),
    PlainSerializer(lambda x: f'{x:.1e}', return_type=str),
    WithJsonSchema({'type': 'string'}, mode='serialization'),
]

Constrained types

  • constrained functions are deprecated
  • use Annotated with Field of StringConstraints instead of constrained type fucntions (ie constr, conint, etc.)
# v1
from pydantic import BaseModel, constr
 
class Foo(BaseModel):
    bar: constr(strip_whitespace=True, to_upper=True, pattern=r'^[A-Z]+$')
 
# v2
from typing import Annotated
from pydantic import BaseModel, StringConstraints
 
class Foo(BaseModel):
    bar: Annotated[
        str,
        StringConstraints(
            strip_whitespace=True, to_upper=True, pattern=r'^[A-Z]+$'
        ),
    ]

Unions (and discriminated fields)

  • https://docs.pydantic.dev/latest/concepts/unions
  • unions only require one member to be valid (ie fields/items/values)
    • which members should you validate against, and in which order, and what validation errors should be raised
  • left-to-right mode and smart mode may lead to unexpected validations
  • the recommended approach is to use discriminators
    • you cn use string literals or callable functions
from typing import Literal
from pydantic import BaseModel, Field, ValidationError
 
class Cat(BaseModel):
    pet_type: Literal['cat']
    meows: int
 
class Dog(BaseModel):
    pet_type: Literal['dog']
    barks: float
 
class Model(BaseModel):
    pet: Cat | Dog = Field(discriminator='pet_type')
    n: int
 
Model(pet={'pet_type': 'dog', 'barks': 3.14}, n=1)

Validators

from pydantic import BaseModel, model_validator
 
class DateModel(BaseModel):
    identifiers: list[int]
    start_date: datetime.date
    end_date: datetime.date
 
    @field_validator('identifiers', mode='before')
    @classmethod
    def ensure_list(cls, value: Any) -> Any:  
        return value if isinstance(value, list) else [value]
 
    @field_validator('identifiers', mode='after')  
    @classmethod
    def validate_even(cls, value: int) -> int:
        if value % 2 == 1: raise ValueError(f'{value} is not an even number'); return value
 
    @model_validator(mode="after")
    def validate_dates(self) -> Self:
        if self.start_date > self.end_date: raise ValueError("Start date comes after end date")

Heuristics for which mode to use:

  • before validators
    • we want to ensure the correct types and formats (given unvalidated data)
    • run any pre-processing (ie transformations, character handling, filtering etc)
    • these shouldn't raise errors directly
  • after validators
    • we want to ensure the correct business logic (given validated data)
    • run any validations (ie sanity checks)
    • these should raise errors directly

Serialisation

  • serialise and dump are used interchangeably
  • use model.model_dump(...)
  • some types may not be json serializable (ie pandas._libs.missing.NAType), especially when passing to response types in a FastAPI service
    • use json_encodable from fastapi

Pandera types and pydantic types

import pandas as pd
import pandera.pandas as pa
from pandera.typing import DataFrame, Series
import pydantic
 
class PanderaModel(pa.DataFrameModel):
    str_col: Series[str] = pa.Field(unique=True)
 
class PydanticModel(pydantic.BaseModel):
    x: int
    df: DataFrame[PanderaModel]

Follows ups

  • min_length vs min_items when applied to strings or list of strings
  • for example, if i want a list of strings, where there is a min of 1 element in the list, and each string has a min of 1 character, what is the correct syntax
class Request(BaseModel):
    code: constr(min_length=1)
    expressions: conlist(item_type=str, min_items=1)
 
class Request(BaseModel):
    code: Annotated[str, StringConstraints(min_length=1)]
    expressions: Annotated[List[str], Field(min_length=1)]