Source code for iode.iode_database.comments_database

import sys
from pathlib import Path
from typing import Union, Tuple, List, Optional, Any

if sys.version_info.minor >= 11:
    from typing import Self
else:
    Self = Any

import pandas as pd
from iode.common import IodeFileType
from iode.util import check_filepath, join_lines, table2str, JUSTIFY
from iode.time.period import Period
from iode.iode_database.abstract_database import IodeDatabase, PositionalIndexer
from iode.iode_cython import ImportFormats, TableLang
from iode.iode_cython import Comments as CythonComments



[docs]
class Comments(IodeDatabase):
    r"""
    IODE Comments database. 

    Attributes
    ----------
    filename: str
    description: str

    Parameters
    ----------
    filepath: str, optional
        file containing the IODE comments to load.

    Returns
    -------
    Comments

    Examples
    --------
    >>> from iode import comments, SAMPLE_DATA_DIR
    >>> comments.load(f"{SAMPLE_DATA_DIR}/fun.cmt")     # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
    Loading .../fun.cmt
    317 objects loaded 
    >>> len(comments)
    317
    >>> comments                                        # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
    Workspace: Comments
    nb comments: 317
    filename: ...\tests\data\fun.cmt
    <BLANKLINE>
     name                                                  comments
    ACAF        Ondernemingen: ontvangen kapitaaloverdrachten.
    ACAG        Totale overheid: netto ontvangen kapitaaloverdrachten.
    AOUC        Kost per eenheid produkt.
    AQC         Kost per eenheid produkt: kapitaal en arbeid.
    BENEF       Ondernemingen: niet-uitgekeerde winsten.
    ...         ...
    ZF          Indexeringscoëfficiënt voor de lonen in de private sector.
    ZJ          Indexeringscoëfficiënt voor de sociale uitkeringen, vertraagd in de periode 1984-1988, voor de
                verrekening van de indexsprongen bij de vervangingsinkomens (1984, 1985, 1987).
    ZKF         Bezettingsgraad van de produktiecapaciteit.
    ZX          Saut d'index (correction en %)
    ZZ_         Marktsector (ondernemingen en zelfstandigen): loonquote (gemiddelde 1954-94).
    <BLANKLINE>
    """

[docs]
    def __init__(self, filepath: str=None):
        raise TypeError("This class cannot be instantiated directly.")


    @classmethod
    def get_instance(cls) -> Self:
        instance = cls.__new__(cls)
        instance._cython_instance = CythonComments()
        return instance

    def _load(self, filepath: str):
        self._cython_instance._load(filepath)

    def _subset(self, pattern: str, copy: bool) -> Self:
        instance = Comments.get_instance()
        instance._cython_instance = self._cython_instance.initialize_subset(instance._cython_instance, pattern, copy)
        return instance

    @property
    def i(self) -> PositionalIndexer:
        r"""
        Allow to select the ith comment in the database.

        Examples
        --------
        >>> from iode import comments, SAMPLE_DATA_DIR
        >>> comments.load(f"{SAMPLE_DATA_DIR}/fun.cmt")       # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Loading .../fun.cmt
        317 objects loaded 
        >>> # get the first comment
        >>> comments.i[0]
        'Ondernemingen: ontvangen kapitaaloverdrachten.'
        >>> # get the last comment
        >>> comments.i[-1]
        'Marktsector (ondernemingen en zelfstandigen): loonquote\n(gemiddelde 1954-94).'
        >>> # update first comment
        >>> comments.i[0] = 'New Comment'
        >>> comments.i[0]
        'New Comment'
        >>> # update last comment
        >>> comments.i[-1] = 'New Comment'
        >>> comments.i[-1]
        'New Comment'
        """
        return PositionalIndexer(self)

    def _get_object(self, key: Union[str, int]) -> str:
        name = self._single_object_key_to_name(key)
        if not name in self:
            raise KeyError(f"Name '{name}' not found in the {type(self).__name__} workspace")
        comment = self._cython_instance._get_object(name)
        return comment

    def _set_object(self, key: Union[str, int], value: str):
        name = self._single_object_key_to_name(key)
        self._cython_instance._set_object(name, value)


[docs]
    def __getitem__(self, key: Union[str, List[str]]) -> Union[str, Self]:
        r"""
        Return the (subset of) comment(s) referenced by `key`.

        The `key` can represent a single object name (e.g. "ACAF") or a list of object names ("ACAF;ACAG;AOUC") 
        or a pattern (e.g. "A*") or a list of sub-patterns (e.g. "A*;*_").
        
        If the `key` represents a list of object names or of sub-patterns, each name or sub-pattern is separated 
        by a `separator` character which is either a whitespace ` `, or a comma `,`, or a semi-colon `;`, or a 
        tabulation `\t`, or a newline `\n`.

        A (sub-)`pattern` is a list of characters representing a group of object names. 
        It includes some special characters which have a special meaning:
        
            - `*` : any character sequence, even empty
            - `?` : any character (one and only one)
            - `@` : any alphanumerical char [A-Za-z0-9]
            - `&` : any non alphanumerical char
            - `|` : any alphanumeric character or none at the beginning and end of a string 
            - `!` : any non-alphanumeric character or none at the beginning and end of a string 
            - `\` : escape the next character

        Note that the `key` can contain references to IODE lists which are prefixed with the symbol `$`.

        Parameters
        ----------
        key: str or list(str)
            (the list of) name(s) of the comment(s) to get.
            The list of comments to get can be specified by a pattern or by a list of sub-patterns (e.g. "A*;*_").

        Returns
        -------
        Single comment or a subset of the database.

        Examples
        --------
        >>> from iode import SAMPLE_DATA_DIR
        >>> from iode import comments
        >>> comments.load(f"{SAMPLE_DATA_DIR}/fun.cmt")       # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Loading .../fun.cmt
        317 objects loaded 

        >>> # a) get one Comment
        >>> comments["ACAF"]
        'Ondernemingen: ontvangen kapitaaloverdrachten.'

        >>> # b) get a subset of the Comments database using a pattern
        >>> comments_subset = comments["A*"]
        >>> comments_subset.names
        ['ACAF', 'ACAG', 'AOUC', 'AQC']
        
        >>> # c) get a subset of the Comments database using a list of names
        >>> comments_subset = comments[["ACAF", "AOUC", "BQY", "BVY"]]
        >>> comments_subset.names
        ['ACAF', 'AOUC', 'BQY', 'BVY']
        """
        return super().__getitem__(key)



[docs]
    def __setitem__(self, key: Union[str, List[str]], value: Union[str, List[str]]):
        r"""
        Update/add a (subset of) comment(s) referenced by `key` from/to the Comments database.

        The `key` can represent a single object name (e.g. "ACAF") or a list of object names ("ACAF;ACAG;AOUC") 
        or a pattern (e.g. "A*") or a list of sub-patterns (e.g. "A*;*_").
        
        If the `key` represents a list of object names or of sub-patterns, each name or sub-pattern is separated 
        by a `separator` character which is either a whitespace ` `, or a comma `,`, or a semi-colon `;`, or a 
        tabulation `\t`, or a newline `\n`.

        A (sub-)`pattern` is a list of characters representing a group of object names. 
        It includes some special characters which have a special meaning:
        
            - `*` : any character sequence, even empty
            - `?` : any character (one and only one)
            - `@` : any alphanumerical char [A-Za-z0-9]
            - `&` : any non alphanumerical char
            - `|` : any alphanumeric character or none at the beginning and end of a string 
            - `!` : any non-alphanumeric character or none at the beginning and end of a string 
            - `\` : escape the next character

        Note that the `key` can contain references to IODE lists which are prefixed with the symbol `$`.

        Parameters
        ----------
        key: str or list(str)
            (the list of) name(s) of the comment(s) to update/add.
            The list of comments to update/add can be specified by a pattern or by a list of sub-patterns 
            (e.g. "A*;*_").
        value: str or dict(str, str) or pandas.Series(str, str) or Comments
            (new) comment(s) value(s).

        Examples
        --------
        >>> import pandas as pd
        >>> from iode import SAMPLE_DATA_DIR
        >>> from iode import comments
        >>> comments.load(f"{SAMPLE_DATA_DIR}/fun.cmt")       # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Loading .../fun.cmt
        317 objects loaded 
        
        >>> # a) add one comment
        >>> comments["BDY"] = "Difference net incomes (YN - YK)"
        >>> comments["BDY"]
        'Difference net incomes (YN - YK)'

        >>> # b) update one comment
        >>> comments["ACAF"]
        'Ondernemingen: ontvangen kapitaaloverdrachten.'
        >>> comments["ACAF"] = "New Value"
        >>> comments["ACAF"]
        'New Value'

        >>> # c) add/update several comments at once
        >>> # 1) using a dict of values
        >>> values = {"AOUC": "Updated AOUC from dict", "ACAF": "Updated ACAF from dict", 
        ...           "ACAG": "Updated ACAG from dict"}
        >>> comments["ACAF, ACAG, AOUC"] = values
        >>> comments["ACAF, ACAG, AOUC"]                # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Workspace: Comments
        nb comments: 3
        filename: ...fun.cmt
        <BLANKLINE>
        name	       comments       
        ACAF	Updated ACAF from dict
        ACAG	Updated ACAG from dict
        AOUC	Updated AOUC from dict

        >>> # 2) using a pandas series
        >>> data = ["Updated AOUC from series", "Updated ACAF from series", "Updated ACAG from series"]
        >>> series = pd.Series(data, index=["AOUC", "ACAF", "ACAG"])
        >>> comments["ACAF, ACAG, AOUC"] = series
        >>> comments["ACAF, ACAG, AOUC"]               # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Workspace: Comments
        nb comments: 3
        filename: ...fun.cmt
        <BLANKLINE>
        name	        comments        
        ACAF	Updated ACAF from series
        ACAG	Updated ACAG from series
        AOUC	Updated AOUC from series

        >>> # 3) using an iode Comments object (subset)
        >>> comments_subset = comments["ACAF, ACAG, AOUC"].copy()
        >>> comments_subset["ACAF"] = "Updated ACAF from another iode Comments database"
        >>> comments_subset["ACAG"] = "Updated ACAG from another iode Comments database"
        >>> comments_subset["AOUC"] = "Updated AOUC from another iode Comments database"
        >>> comments["ACAF, ACAG, AOUC"] = comments_subset
        >>> comments["ACAF, ACAG, AOUC"]                # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Workspace: Comments
        nb comments: 3
        filename: ...fun.cmt
        <BLANKLINE>
        name	                    comments                    
        ACAF	Updated ACAF from another iode Comments database
        ACAG	Updated ACAG from another iode Comments database
        AOUC	Updated AOUC from another iode Comments database

        >>> # d) working on a subset
        >>> # 1) get subset
        >>> comments_subset = comments["A*"]
        >>> comments_subset.names
        ['ACAF', 'ACAG', 'AOUC', 'AQC']
        >>> # 2) add a comment to the subset 
        >>> comments_subset["A0"] = "New Comment"
        >>> comments_subset["A0"]
        'New Comment'
        >>> # --> new comment also appears in the global workspace
        >>> "A0" in comments
        True
        >>> comments["A0"]
        'New Comment'
        >>> # 3) update a comment in the subset
        >>> comments_subset["A0"] = "Updated Comment"
        >>> comments_subset["A0"]
        'Updated Comment'
        >>> # --> comment is also updated in the global workspace
        >>> comments["A0"]
        'Updated Comment'
        """
        super().__setitem__(key, value)



[docs]
    def __delitem__(self, key):
        r"""
        Remove the (subset of) comment(s) referenced by `key` from the Comments database.

        Parameters
        ----------
        key: str or list(str)
            (list of) name(s) of the comment(s) to be removed.
            The list of names can be given as a string pattern (e.g. "A*;*_").

        Examples
        --------
        >>> from iode import SAMPLE_DATA_DIR
        >>> from iode import comments
        >>> comments.load(f"{SAMPLE_DATA_DIR}/fun.cmt")       # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Loading .../fun.cmt
        317 objects loaded 

        >>> # a) delete one comment
        >>> comments.get_names("A*")
        ['ACAF', 'ACAG', 'AOUC', 'AQC']
        >>> del comments["ACAF"]
        >>> comments.get_names("A*")
        ['ACAG', 'AOUC', 'AQC']

        >>> # b) delete several comments at once using a pattern
        >>> del comments["A*"]
        >>> comments.get_names("A*")
        []

        >>> # c) delete several comments at once using a list of names
        >>> comments.get_names("B*")
        ['BENEF', 'BENEF_', 'BQY', 'BVY']
        >>> del comments[["BENEF", "BQY"]]
        >>> comments.get_names("B*")
        ['BENEF_', 'BVY']

        >>> # delete one comment from a subset of the global database
        >>> comments_subset = comments["D*"]
        >>> comments_subset.names
        ['DPU', 'DPUF', 'DPUG', 'DPUGO', 'DPUH', 'DPUU', 'DTF', 'DTFX', 'DTH', 'DTH1', 'DTH1C', 'DTHX']
        >>> del comments_subset["DPUGO"]
        >>> comments_subset.names
        ['DPU', 'DPUF', 'DPUG', 'DPUH', 'DPUU', 'DTF', 'DTFX', 'DTH', 'DTH1', 'DTH1C', 'DTHX']
        >>> # NOTE: the comment has also been deleted from the global database
        >>> "DPUGO" in comments
        False
        >>> comments.get_names("D*")
        ['DPU', 'DPUF', 'DPUG', 'DPUH', 'DPUU', 'DTF', 'DTFX', 'DTH', 'DTH1', 'DTH1C', 'DTHX']
        """
        super().__delitem__(key)



[docs]
    def copy_from(self, input_files: Union[str, List[str]], names: Union[str, List[str]]='*'):
        r"""
        Copy (a subset of) comments from the input file(s) 'input_files' into the current database.

        Parameters
        ----------
        input_file: str or list(str)
            file(s) from which the copied comments are read.
        names: str or list(str)
            list of comments to copy from the input file(s).
            Defaults to load all comments from the input file(s). 
        
        Examples
        --------
        >>> from iode import SAMPLE_DATA_DIR
        >>> from iode import comments
        >>> comments.load(f"{SAMPLE_DATA_DIR}/fun.cmt")       # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Loading .../fun.cmt
        317 objects loaded 
        >>> len(comments)
        317

        >>> # delete all comments with a name starting with 'A'
        >>> comments.remove("A*")
        >>> comments.get_names("A*")
        []

        >>> # load all comments with a name starting with 'A'
        >>> comments.copy_from(f"{SAMPLE_DATA_DIR}/fun.cmt", "A*")      # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Loading ...\fun.cmt
        317 objects loaded
        >>> comments.get_names("A*")
        ['ACAF', 'ACAG', 'AOUC', 'AQC']

        >>> comments.clear()
        >>> # load all comments
        >>> comments.copy_from(f"{SAMPLE_DATA_DIR}/fun.cmt")            # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Loading ...\fun.cmt
        317 objects loaded
        >>> len(comments)
        317
        """
        input_files, names = self._copy_from(input_files, names)
        self._cython_instance.copy_from(input_files, names)



[docs]
    def from_series(self, s: pd.Series):
        r"""
        Copy the pandas Series `s` into the IODE Comments database.
        The comment names to copy are deduced from the index of the Series.

        Parameters
        ----------
        s: Series
            pandas Series containing the comments to copy into the IODE Comments database.

        Notes
        -----
        The index of the passed Series is sorted in alphabetical order before 
        copying to IODE Comments database.

        See Also
        --------
        Comments.to_series

        Examples
        --------
        >>> from iode import comments
        >>> import pandas as pd
        >>> comments.clear()
        >>> len(comments)
        0

        >>> # create the pandas Series
        >>> names = ["A0", "A1", "B0", "B1", "C0", "C1"]
        >>> data = ["A zero", "A one", "B zero", "B one", "C zero", "C one"]
        >>> s = pd.Series(data=data, index=names, dtype=str, name="Comments")
        >>> # display the pandas series
        >>> s          # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        A0     A zero
        A1     A one
        B0    B zero
        B1     B one
        C0    C zero
        C1     C one
        Name: Comments, dtype: object

        >>> # load into the IODE Comments database
        >>> comments.from_series(s)
        >>> len(comments)
        6
        
        >>> comments.names             # doctest: +ELLIPSIS
        ['A0', 'A1', 'B0', 'B1', 'C0', 'C1']
        >>> comments["B0"]
        'B zero'
        >>> comments["C1"]
        'C one'
        """
        if not (self.is_global_workspace or self.is_detached):
            # check that all names in the pandas object are present in the current subset 
            self._check_same_names(self.names, s.index.tolist())

        for index, value in s.items():
            self._set_object(index, value)



[docs]
    def to_series(self) -> pd.Series:
        r"""
        Create a pandas Series from the current Comments database.
        The index of the returned Series is build from the Comments names.

        See Also
        --------
        Comments.from_series

        Examples
        --------
        >>> from iode import SAMPLE_DATA_DIR, comments
        >>> import pandas as pd
        >>> comments.load(f"{SAMPLE_DATA_DIR}/fun.cmt")       # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Loading .../fun.cmt
        317 objects loaded 
        >>> len(comments)
        317

        >>> # Export the IODE Comments database as a pandas Series
        >>> s = comments.to_series()
        >>> len(s)
        317

        >>> s.index.to_list()               # doctest: +ELLIPSIS
        ['ACAF', 'ACAG', 'AOUC', 'AQC', ..., 'ZJ', 'ZKF', 'ZX', 'ZZ_']
        >>> comments["ACAF"]                # doctest: +NORMALIZE_WHITESPACE
        'Ondernemingen: ontvangen kapitaaloverdrachten.' 
        >>> s["ACAF"]                       # doctest: +NORMALIZE_WHITESPACE
        'Ondernemingen: ontvangen kapitaaloverdrachten.' 
        >>> comments["ZZ_"]                 # doctest: +NORMALIZE_WHITESPACE
        'Marktsector (ondernemingen en zelfstandigen): loonquote\n(gemiddelde 1954-94).'
        >>> s["ZZ_"]                        # doctest: +NORMALIZE_WHITESPACE
        'Marktsector (ondernemingen en zelfstandigen): loonquote\n(gemiddelde 1954-94).'

        >>> # Export a subset of the IODE Comments database as a pandas Series
        >>> s = comments["A*;*_"].to_series()
        >>> len(s)
        34

        >>> s.index.to_list()               # doctest: +ELLIPSIS
        ['ACAF', 'ACAG', 'AOUC', 'AQC', ..., 'WIND_', 'WNF_', 'YDH_', 'ZZ_']
        >>> comments["ACAF"]                # doctest: +NORMALIZE_WHITESPACE
        'Ondernemingen: ontvangen kapitaaloverdrachten.' 
        >>> s["ACAF"]                       # doctest: +NORMALIZE_WHITESPACE
        'Ondernemingen: ontvangen kapitaaloverdrachten.' 
        >>> comments["ZZ_"]                 # doctest: +NORMALIZE_WHITESPACE
        'Marktsector (ondernemingen en zelfstandigen): loonquote\n(gemiddelde 1954-94).'
        >>> s["ZZ_"]                        # doctest: +NORMALIZE_WHITESPACE
        'Marktsector (ondernemingen en zelfstandigen): loonquote\n(gemiddelde 1954-94).'
        """        
        names = self.names
        data = [self._get_object(name) for name in names]
        return pd.Series(data=data, index=names, dtype=str, name=self.__class__.__name__)


    @property
    def series(self) -> pd.Series:
        r"""
        Create a pandas Series from the current Comments database.
        The index of the returned Series is build from the Comments names.

        See Also
        --------
        Comments.to_series

        Examples
        --------
        >>> from iode import SAMPLE_DATA_DIR, comments
        >>> import pandas as pd
        >>> comments.load(f"{SAMPLE_DATA_DIR}/fun.cmt")       # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Loading .../fun.cmt
        317 objects loaded 
        >>> len(comments)
        317

        >>> # Export the IODE Comments database as a pandas Series
        >>> s = comments.series
        >>> len(s)
        317

        >>> s.index.to_list()               # doctest: +ELLIPSIS
        ['ACAF', 'ACAG', 'AOUC', 'AQC', ..., 'ZJ', 'ZKF', 'ZX', 'ZZ_']
        >>> comments["ACAF"]                # doctest: +NORMALIZE_WHITESPACE
        'Ondernemingen: ontvangen kapitaaloverdrachten.' 
        >>> s["ACAF"]                       # doctest: +NORMALIZE_WHITESPACE
        'Ondernemingen: ontvangen kapitaaloverdrachten.' 
        >>> comments["ZZ_"]                 # doctest: +NORMALIZE_WHITESPACE
        'Marktsector (ondernemingen en zelfstandigen): loonquote\n(gemiddelde 1954-94).'
        >>> s["ZZ_"]                        # doctest: +NORMALIZE_WHITESPACE
        'Marktsector (ondernemingen en zelfstandigen): loonquote\n(gemiddelde 1954-94).'

        >>> # Export a subset of the IODE Comments database as a pandas Series
        >>> s = comments["A*;*_"].series
        >>> len(s)
        34

        >>> s.index.to_list()               # doctest: +ELLIPSIS
        ['ACAF', 'ACAG', 'AOUC', 'AQC', ..., 'WIND_', 'WNF_', 'YDH_', 'ZZ_']
        >>> comments["ACAF"]                # doctest: +NORMALIZE_WHITESPACE
        'Ondernemingen: ontvangen kapitaaloverdrachten.' 
        >>> s["ACAF"]                       # doctest: +NORMALIZE_WHITESPACE
        'Ondernemingen: ontvangen kapitaaloverdrachten.' 
        >>> comments["ZZ_"]                 # doctest: +NORMALIZE_WHITESPACE
        'Marktsector (ondernemingen en zelfstandigen): loonquote\n(gemiddelde 1954-94).'
        >>> s["ZZ_"]                        # doctest: +NORMALIZE_WHITESPACE
        'Marktsector (ondernemingen en zelfstandigen): loonquote\n(gemiddelde 1954-94).'
        """
        return self.to_series()


[docs]
    @classmethod
    def convert_file(cls, input_file: Union[str, Path], input_format: Union[str, ImportFormats], 
                     save_file: Union[str, Path], rule_file: Union[str, Path], 
                     lang: Union[str, TableLang]=TableLang.ENGLISH, debug_file: Union[str, Path]=None):
        r"""
        Convert an external file representing IODE comments to an IODE comments file (.cmt). 
        The possible formats for the input file are:
          
          - `Ascii`: IODE-specific Ascii format for objects 
          - `Rotated Ascii`: Ascii format for variables with series in columns 
          - `DIF`: DIF format (Data Interchange Format) 
          - `DIF` Belgostat: (old) exchange format specific to Belgostat 
          - `NIS`: National Institute of Statistics Ascii format (old) 
          - `GEM`: Ascii format of Chronos software 
          - `PRN-Aremos`: Ascii format from Aremos software 
          - `TXT Belgostat`: (old) Belgostat-specific exchange format 

        The rule file is a simple text file contains the rules for: 
        
          - selecting the objects to be imported 
          - determining the objects names.

        Each rule consists of two fields:

          - the selection pattern, containing a description of the names concerned by the rule. 
            This mask is defined in the same way as the :py:meth:`~iode.Comments.search` method.
          - the transcoding algorithm for the names, which can contain : 
            - `+` : indicates that the character must be included in the name 
            - `-` : indicates that the character should be skipped 
            - any other character: included in the name 
        
        Example:

            B* C+-+          -> transforms B1234 into CB2, BCDEF into CBE, etc 
            *X ++++++++++    -> keeps names ending in X unchanged
            * ++++++++++     -> keeps all names unchanged

        Parameters
        ----------
        input_file : str or Path
            The path to the input file to be converted. 
        input_format : str or ImportFormats
            The format of the input file. Possible formats are ASCII, ROT_ASCII (Rotated Ascii), 
            DIF, BISTEL, NIS, GEM, PRN, TXT (TXT Belgostat).
        save_file : str or Path
            The path to the output file where the IODE comments will be saved.
        rule_file : str or Path
            The path to the rule file that defines the selection and transcoding rules. 
        lang : str or TableLang, optional
            The language of the extracted comments. 
            It is only used when a text appears in several languages in the input file. 
            Currently, only the Belgostat DIF format uses this value, allowing you to select 
            the language of the extracted comments.
            Default is ENGLISH.
        debug_file : str or Path, optional
            The path to the debug file where the debug information will be saved. 
            If not provided, the debug information will be printed to the console.

        Examples
        --------
        >>> from pathlib import Path
        >>> from iode import SAMPLE_DATA_DIR, comments, ImportFormats
        >>> output_dir = getfixture('tmp_path')

        >>> input_file = f"{SAMPLE_DATA_DIR}/fun_xode.ac.ref"
        >>> input_format = ImportFormats.ASCII
        >>> save_file = str(output_dir / "imported_cmt.cmt")
        >>> rule_file = f"{SAMPLE_DATA_DIR}/rules.txt"
        >>> debug_file = str(output_dir / "debug.log")

        >>> # print rules
        >>> with open(rule_file, "r") as f:         # doctest: +NORMALIZE_WHITESPACE
        ...     print(f.read())
        ...
        AC*  KK_--+++++++++++++
        *U   UU_++++++++++++++++
        >>> # get list of comments with a name starting with 'AC' 
        >>> # and ending with 'U' from the input file
        >>> with open(input_file, "r") as f:         # doctest: +NORMALIZE_WHITESPACE
        ...     for line in f:
        ...         name = line.split(" ")[0]
        ...         if name.startswith("AC") or name.endswith("U"):
        ...             print(line.strip())
        ...
        ACAF "Ondernemingen: ontvangen kapitaaloverdrachten."
        ACAG "Totale overheid: netto ontvangen kapitaaloverdrachten."
        DPU "Nominale afschrijvingen op de kapitaalvoorraad."
        DPUU "Nominale afschrijvingen op de kapitaalvoorraad (aangepast: inkomensoptiek)."
        IFU "Bruto kapitaalvorming: ondernemingen."
        IHU "Bruto kapitaalvorming: gezinnen."
        WBU "Totale loonmassa (inclusief werkgeversbijdragen)."
        >>> # import comments from input_file to save_file
        >>> # using the rules defined in rule_file
        >>> comments.convert_file(input_file, input_format, save_file, rule_file, 'E', debug_file)
        Reading object 1 : KK_AF
        Reading object 2 : KK_AG
        Reading object 3 : UU_DPU
        Reading object 4 : UU_DPUU
        Reading object 5 : UU_IFU
        Reading object 6 : UU_IHU
        Reading object 7 : UU_WBU
        7 objects saved
        >>> # check content of the saved file
        >>> comments.load(save_file)        # doctest: +ELLIPSIS
        Loading ...\imported_cmt.cmt
        7 objects loaded
        >>> comments                        # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
        Workspace: Comments
        nb comments: 7
        filename: ...\imported_cmt.cmt
        <BLANKLINE>
          name                                        comments
        KK_AF       Ondernemingen: ontvangen kapitaaloverdrachten.
        KK_AG       Totale overheid: netto ontvangen kapitaaloverdrachten.
        UU_DPU      Nominale afschrijvingen op de kapitaalvoorraad.
        UU_DPUU     Nominale afschrijvingen op de kapitaalvoorraad (aangepast: inkomensoptiek).
        UU_IFU      Bruto kapitaalvorming: ondernemingen.
        UU_IHU      Bruto kapitaalvorming: gezinnen.
        UU_WBU      Totale loonmassa (inclusief werkgeversbijdragen).
        <BLANKLINE>
        >>> # content of the debug file
        >>> with open(debug_file, "r") as f:         # doctest: +NORMALIZE_WHITESPACE
        ...     for line in f:
        ...         print(line.strip())
        ...
        ACAF -> KK_AF       (Rule KK_--+++++++++++++)
        ACAG -> KK_AG       (Rule KK_--+++++++++++++)
        DPU -> UU_DPU       (Rule UU_++++++++++++++++)
        DPUU -> UU_DPUU     (Rule UU_++++++++++++++++)
        IFU -> UU_IFU       (Rule UU_++++++++++++++++)
        IHU -> UU_IHU       (Rule UU_++++++++++++++++)
        WBU -> UU_WBU       (Rule UU_++++++++++++++++)
        """
        # $FileImportCmt format rule_file input_file language [debug_file]
        input_file = check_filepath(input_file, IodeFileType.FILE_ANY, file_must_exist=True)

        _c_import_formats: str = ''.join([item.name[0] for item in list(ImportFormats)])
        if isinstance(input_format, ImportFormats):
            input_format = input_format.name[0]
        if input_format not in _c_import_formats:
            raise ValueError(f"Invalid input format '{input_format}'. "
                             f"Possible values are: {_c_import_formats}")
        
        save_file = check_filepath(save_file, IodeFileType.FILE_COMMENTS, file_must_exist=False)
        rule_file = check_filepath(rule_file, IodeFileType.FILE_ANY, file_must_exist=True)

        _c_table_langs: str = ''.join([item.name[0] for item in list(TableLang)])
        if isinstance(lang, TableLang):
            lang = lang.name[0]
        if lang not in _c_table_langs:
            raise ValueError(f"Invalid language '{lang}'. " 
                             f"Possible values are: {_c_table_langs}") 

        # $FileImportCmt format rule infile outfile language [trace]
        args = f"{input_format} {rule_file} {input_file} {save_file} {lang}"
        
        if debug_file:
            debug_file = check_filepath(debug_file, IodeFileType.FILE_LOG, file_must_exist=False)
            args += " " + debug_file

        res = CythonComments.convert_file(args)
        if res < 0:
            raise RuntimeError(f"Couldn't import comments from file '{input_file}'")


    def _str_table(self, names: List[str]) -> str:
        columns = {"name": names, "comments": [join_lines(self._get_object(name)) for name in names]}
        return table2str(columns, max_lines=10, justify_funcs={"name": JUSTIFY.LEFT, "comments": JUSTIFY.LEFT})


[docs]
    def print_to_file(self, filepath: Union[str, Path], names: Union[str, List[str]]=None, format: str=None):
        r"""
        Print the list comments defined by `names` to the file `filepath` using the format `format`.

        Argument `format` must be in the list:
        - 'H' (HTML file)
        - 'M' (MIF file)
        - 'R' (RTF file)
        - 'C' (CSV file)

        If argument `format` is null (default), the *A2M* format will be used to print the output.

        If the filename does not contain an extension, it is automatically added based on 
        the value of `format`.

        If `names` is a string, it is considered as a *pattern* and the function will print 
        all comments matching the pattern. The following characters in *pattern* have a 
        special meaning:
        
            - `*` : any character sequence, even empty
            - `?` : any character (one and only one)
            - `@` : any alphanumerical char [A-Za-z0-9]
            - `&` : any non alphanumerical char
            - `|` : any alphanumeric character or none at the beginning and end of a string 
            - `!` : any non-alphanumeric character or none at the beginning and end of a string 
            - `\` : escape the next character

        If `names` is None, print all comments of the (subset of the) current database.

        Parameters
        ----------
        filepath: str or Path
            path to the file to print.
            If the filename does not contain an extension, it is automatically 
            added based on the value of the format argument.
        names: str or list of str, optional
            pattern or list of names of the comme,ts to print.
            If None, print all comments of the (subset of the) current database.
            Defaults to None.
        format: str, optional
            format of the output file. Possible values are: 'H' (HTML file), 
            'M' (MIF file), 'R' (RTF file) or 'C' (CSV file).
            Defaults to None meaning that the comments will be dumped in the *A2M* format.

        Examples
        --------
        >>> from iode import comments, SAMPLE_DATA_DIR
        >>> output_dir = getfixture('tmp_path')
        >>> comments.load(f"{SAMPLE_DATA_DIR}/fun.cmt")             # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Loading .../fun.cmt
        317 objects loaded
        >>> comments.print_to_file(output_dir / "comments.csv", ["ACAF", "ACAG"])     # doctest: +ELLIPSIS
        Printing IODE objects definition to file '...comments.csv'...
        Printing ACAF ...
        Printing ACAG ...
        Print done
        >>> with open(output_dir / "comments.csv") as f:                     # doctest: +NORMALIZE_WHITESPACE
        ...     print(f.read())
        ...
        " - ACAF : Ondernemingen : ontvangen kapitaaloverdrachten."
        " - ACAG : Totale overheid : netto ontvangen kapitaaloverdrachten."
        <BLANKLINE>
        """
        super().print_to_file(filepath, names, format)


    def __hash__(self) -> int:
        r"""
        Return a hash value for the current Comments database.

        Examples
        --------
        >>> from iode import SAMPLE_DATA_DIR, comments
        >>> comments.load(f"{SAMPLE_DATA_DIR}/fun.cmt")       # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Loading .../fun.cmt
        317 objects loaded 
        >>> len(comments)
        317
        >>> original_hash = hash(comments)
        
        >>> # rename 1 comment
        >>> comments.rename("ACAF", "ACAF_")
        >>> original_hash == hash(comments)
        False
        >>> comments.rename("ACAF_", "ACAF")  # revert the change
        >>> original_hash == hash(comments)
        True

        >>> # modify one comment
        >>> original_comment = comments["ACAF"]
        >>> comments["ACAF"] = "modified comment"
        >>> original_hash == hash(comments)
        False
        >>> comments["ACAF"] = original_comment  # revert the change
        >>> original_hash == hash(comments)
        True

        >>> # delete a comment
        >>> original_comment = comments["ACAF"]
        >>> del comments["ACAF"]
        >>> original_hash == hash(comments)
        False
        >>> comments["ACAF"] = original_comment
        >>> original_hash == hash(comments)
        True

        >>> # add a comment
        >>> comments["NEW"] = "new comment"
        >>> original_hash == hash(comments)
        False
        >>> del comments["NEW"]
        >>> original_hash == hash(comments)
        True
        """
        return super().__hash__()



comments: Comments = Comments.get_instance()