diff --git a/commitizen/config/yaml_config.py b/commitizen/config/yaml_config.py index 1e9610e17..9093c919f 100644 --- a/commitizen/config/yaml_config.py +++ b/commitizen/config/yaml_config.py @@ -27,10 +27,11 @@ def __init__(self, *, data: bytes | str, path: Path) -> None: self._parse_setting(data) def init_empty_config_content(self) -> None: - with smart_open( - self.path, "a", encoding=self._settings["encoding"] - ) as json_file: - yaml.dump({"commitizen": {}}, json_file, explicit_start=True) + # Write YAML as UTF-8; YAML 1.2 requires UTF-8/16/32. + with smart_open(self.path, "a", encoding="utf-8") as yaml_file: + yaml.dump( + {"commitizen": {}}, yaml_file, explicit_start=True, allow_unicode=True + ) def contains_commitizen_section(self) -> bool: with self.path.open("rb") as yaml_file: @@ -60,9 +61,8 @@ def set_key(self, key: str, value: object) -> Self: config_doc = yaml.load(yaml_file, Loader=yaml.FullLoader) config_doc["commitizen"][key] = value - with smart_open( - self.path, "w", encoding=self._settings["encoding"] - ) as yaml_file: - yaml.dump(config_doc, yaml_file, explicit_start=True) + # Write YAML as UTF-8; YAML 1.2 requires UTF-8/16/32. + with smart_open(self.path, "w", encoding="utf-8") as yaml_file: + yaml.dump(config_doc, yaml_file, explicit_start=True, allow_unicode=True) return self diff --git a/tests/test_conf.py b/tests/test_conf.py index c004e96e1..a348e511a 100644 --- a/tests/test_conf.py +++ b/tests/test_conf.py @@ -2,6 +2,7 @@ import json import os +import re from pathlib import Path from typing import Any @@ -497,3 +498,39 @@ def test_init_with_invalid_content(self, tmp_path, config_file): with pytest.raises(InvalidConfigurationError) as excinfo: YAMLConfig(data=existing_content, path=path) assert config_file in str(excinfo.value) + + def test_set_key_preserves_unicode(self, tmp_path, config_file): + """Regression test for #1164: emoji and other non-ASCII characters + must be preserved verbatim, not escaped to ``\\Uxxxx`` sequences.""" + path = tmp_path / "commitizen" / config_file + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text( + "commitizen:\n" + ' bump_message: "🚀 chore: bump $current_version to $new_version"\n', + encoding="utf-8", + ) + + yaml_config = YAMLConfig(data=path.read_text(encoding="utf-8"), path=path) + yaml_config.set_key("version", "0.1.1") + + rewritten = path.read_text(encoding="utf-8") + assert "🚀" in rewritten + assert not re.search(r"\\U[0-9a-fA-F]{8}", rewritten) + + def test_init_empty_config_content_passes_allow_unicode( + self, tmp_path, config_file, mocker + ): + """``init_empty_config_content`` must call ``yaml.dump`` with + ``allow_unicode=True`` so that any non-ASCII default content (for + future maintainers) is written verbatim. The current default + (``{"commitizen": {}}``) is ASCII-only, so this asserts the + keyword is passed rather than its observable behaviour.""" + path = tmp_path / "commitizen" / config_file + path.parent.mkdir(parents=True, exist_ok=True) + dump_spy = mocker.spy(yaml, "dump") + + yaml_config = YAMLConfig(data="{}", path=path) + yaml_config.init_empty_config_content() + + dump_spy.assert_called_once() + assert dump_spy.call_args.kwargs.get("allow_unicode") is True