Skip to content

Local

src.featureform.register.LocalProvider

The LocalProvider exposes the registration functions for LocalMode

Using the LocalProvider:

from featureform import local

transactions = local.register_file(
    name="transactions",
    variant="quickstart",
    description="A dataset of fraudulent transactions",
    path="transactions.csv"
)

Source code in src/featureform/register.py
class LocalProvider:
    """
    The LocalProvider exposes the registration functions for LocalMode

    **Using the LocalProvider:**
    ``` py
    from featureform import local

    transactions = local.register_file(
        name="transactions",
        variant="quickstart",
        description="A dataset of fraudulent transactions",
        path="transactions.csv"
    )
    ```
    """

    def __init__(self, registrar, provider):
        self.__registrar = registrar
        self.__provider = provider

    def name(self) -> str:
        return self.__provider.name

    def register_file(self, name, description, path, variant="default", owner="", tags: List[str] = [], properties: dict = {}):
        """Register a local file.

        **Examples**:
        ```
        transactions = local.register_file(
            name="transactions",
            variant="quickstart",
            description="A dataset of fraudulent transactions",
            path="transactions.csv"
        )
        ```
        Args:
            name (str): Name for how to reference the file later
            description (str): Description of the file
            path (str): Path to the file
            variant (str): File variant
            owner (str): Owner of the file

        Returns:
            source (LocalSource): source
        """
        if owner == "":
            owner = self.__registrar.must_get_default_owner()
        # Store the file as a source
        self.__registrar.register_primary_data(name=name,
                                               variant=variant,
                                               location=SQLTable(path),
                                               provider=self.__provider.name,
                                               owner=owner,
                                               description=description,
                                               tags=tags,
                                               properties=properties)
        return LocalSource(self.__registrar, name, owner, variant, self.name(), path, description)

    def insert_provider(self):
        sqldb = SQLiteMetadata()
        # Store a new provider row
        sqldb.insert("providers",
                     self.__provider.name,
                     "Provider",
                     self.__provider.description,
                     self.__provider.config.type(),
                     self.__provider.config.software(),
                     self.__provider.team,
                     "sources",
                     "status",
                     str(self.__provider.config.serialize(), 'utf-8')
                     )
        sqldb.close()

    def df_transformation(self,
                          variant: str = "default",
                          owner: Union[str, UserRegistrar] = "",
                          name: str = "",
                          description: str = "",
                          inputs: list = [],
                          tags: List[str] = [],
                          properties: dict = {}):
        """
        Register a Dataframe transformation source. The local.df_transformation decorator takes the contents
        of the following function and executes the code it contains at serving time.

        The name of the function is used as the name of the source when being registered.

        The specified inputs are loaded into dataframes that can be accessed using the function parameters.

        **Examples**:
        ``` py
        @local.df_transformation(inputs=[("source", "one"), ("source", "two")]) # Sources are added as inputs
        def average_user_transaction(df_one, df_two):                           # Sources can be manipulated by adding them as params
            return source_one.groupby("CustomerID")["TransactionAmount"].mean()
        ```

        Args:
            name (str): Name of source
            variant (str): Name of variant
            owner (Union[str, UserRegistrar]): Owner
            description (str): Description of primary data to be registered
            inputs (list[Tuple(str, str)]): A list of Source NameVariant Tuples to input into the transformation

        Returns:
            source (ColumnSourceRegistrar): Source
        """
        return self.__registrar.df_transformation(name=name,
                                                  variant=variant,
                                                  owner=owner,
                                                  provider=self.name(),
                                                  description=description,
                                                  inputs=inputs,
                                                  tags=tags,
                                                  properties=properties)

    def sql_transformation(self,
                           variant: str = "default",
                           owner: Union[str, UserRegistrar] = "",
                           name: str = "",
                           description: str = "",
                           tags: List[str] = [],
                           properties: dict = {}):
        """
        Register a SQL transformation source. The local.sql_transformation decorator takes the returned string in the
        following function and executes it as a SQL Query.

        The name of the function is the name of the resulting source.

        Sources for the transformation can be specified by adding the Name and Variant in brackets '{{ name.variant }}'.
        The correct source is substituted when the query is run.

        **Examples**:
        ``` py
        @local.sql_transformation(variant="quickstart")
        def average_user_transaction():
            return "SELECT CustomerID as user_id, avg(TransactionAmount) as avg_transaction_amt from" \
            " {{transactions.v1}} GROUP BY user_id"
        ```

        Args:
            name (str): Name of source
            variant (str): Name of variant
            owner (Union[str, UserRegistrar]): Owner
            description (str): Description of primary data to be registered


        Returns:
            source (ColumnSourceRegistrar): Source
        """
        return self.__registrar.sql_transformation(name=name,
                                                   variant=variant,
                                                   owner=owner,
                                                   provider=self.name(),
                                                   description=description,
                                                   tags=tags,
                                                   properties=properties)

df_transformation(variant='default', owner='', name='', description='', inputs=[], tags=[], properties={})

Register a Dataframe transformation source. The local.df_transformation decorator takes the contents of the following function and executes the code it contains at serving time.

The name of the function is used as the name of the source when being registered.

The specified inputs are loaded into dataframes that can be accessed using the function parameters.

Examples:

@local.df_transformation(inputs=[("source", "one"), ("source", "two")]) # Sources are added as inputs
def average_user_transaction(df_one, df_two):                           # Sources can be manipulated by adding them as params
    return source_one.groupby("CustomerID")["TransactionAmount"].mean()

Parameters:

Name Type Description Default
name str

Name of source

''
variant str

Name of variant

'default'
owner Union[str, UserRegistrar]

Owner

''
description str

Description of primary data to be registered

''
inputs list[Tuple(str, str)]

A list of Source NameVariant Tuples to input into the transformation

[]

Returns:

Name Type Description
source ColumnSourceRegistrar

Source

Source code in src/featureform/register.py
def df_transformation(self,
                      variant: str = "default",
                      owner: Union[str, UserRegistrar] = "",
                      name: str = "",
                      description: str = "",
                      inputs: list = [],
                      tags: List[str] = [],
                      properties: dict = {}):
    """
    Register a Dataframe transformation source. The local.df_transformation decorator takes the contents
    of the following function and executes the code it contains at serving time.

    The name of the function is used as the name of the source when being registered.

    The specified inputs are loaded into dataframes that can be accessed using the function parameters.

    **Examples**:
    ``` py
    @local.df_transformation(inputs=[("source", "one"), ("source", "two")]) # Sources are added as inputs
    def average_user_transaction(df_one, df_two):                           # Sources can be manipulated by adding them as params
        return source_one.groupby("CustomerID")["TransactionAmount"].mean()
    ```

    Args:
        name (str): Name of source
        variant (str): Name of variant
        owner (Union[str, UserRegistrar]): Owner
        description (str): Description of primary data to be registered
        inputs (list[Tuple(str, str)]): A list of Source NameVariant Tuples to input into the transformation

    Returns:
        source (ColumnSourceRegistrar): Source
    """
    return self.__registrar.df_transformation(name=name,
                                              variant=variant,
                                              owner=owner,
                                              provider=self.name(),
                                              description=description,
                                              inputs=inputs,
                                              tags=tags,
                                              properties=properties)

register_file(name, description, path, variant='default', owner='', tags=[], properties={})

Register a local file.

Examples:

transactions = local.register_file(
    name="transactions",
    variant="quickstart",
    description="A dataset of fraudulent transactions",
    path="transactions.csv"
)

Parameters:

Name Type Description Default
name str

Name for how to reference the file later

required
description str

Description of the file

required
path str

Path to the file

required
variant str

File variant

'default'
owner str

Owner of the file

''

Returns:

Name Type Description
source LocalSource

source

Source code in src/featureform/register.py
def register_file(self, name, description, path, variant="default", owner="", tags: List[str] = [], properties: dict = {}):
    """Register a local file.

    **Examples**:
    ```
    transactions = local.register_file(
        name="transactions",
        variant="quickstart",
        description="A dataset of fraudulent transactions",
        path="transactions.csv"
    )
    ```
    Args:
        name (str): Name for how to reference the file later
        description (str): Description of the file
        path (str): Path to the file
        variant (str): File variant
        owner (str): Owner of the file

    Returns:
        source (LocalSource): source
    """
    if owner == "":
        owner = self.__registrar.must_get_default_owner()
    # Store the file as a source
    self.__registrar.register_primary_data(name=name,
                                           variant=variant,
                                           location=SQLTable(path),
                                           provider=self.__provider.name,
                                           owner=owner,
                                           description=description,
                                           tags=tags,
                                           properties=properties)
    return LocalSource(self.__registrar, name, owner, variant, self.name(), path, description)

sql_transformation(variant='default', owner='', name='', description='', tags=[], properties={})

Register a SQL transformation source. The local.sql_transformation decorator takes the returned string in the following function and executes it as a SQL Query.

The name of the function is the name of the resulting source.

Sources for the transformation can be specified by adding the Name and Variant in brackets '{{ name.variant }}'. The correct source is substituted when the query is run.

Examples:

@local.sql_transformation(variant="quickstart")
def average_user_transaction():
    return "SELECT CustomerID as user_id, avg(TransactionAmount) as avg_transaction_amt from"             " {{transactions.v1}} GROUP BY user_id"

Parameters:

Name Type Description Default
name str

Name of source

''
variant str

Name of variant

'default'
owner Union[str, UserRegistrar]

Owner

''
description str

Description of primary data to be registered

''

Returns:

Name Type Description
source ColumnSourceRegistrar

Source

Source code in src/featureform/register.py
def sql_transformation(self,
                       variant: str = "default",
                       owner: Union[str, UserRegistrar] = "",
                       name: str = "",
                       description: str = "",
                       tags: List[str] = [],
                       properties: dict = {}):
    """
    Register a SQL transformation source. The local.sql_transformation decorator takes the returned string in the
    following function and executes it as a SQL Query.

    The name of the function is the name of the resulting source.

    Sources for the transformation can be specified by adding the Name and Variant in brackets '{{ name.variant }}'.
    The correct source is substituted when the query is run.

    **Examples**:
    ``` py
    @local.sql_transformation(variant="quickstart")
    def average_user_transaction():
        return "SELECT CustomerID as user_id, avg(TransactionAmount) as avg_transaction_amt from" \
        " {{transactions.v1}} GROUP BY user_id"
    ```

    Args:
        name (str): Name of source
        variant (str): Name of variant
        owner (Union[str, UserRegistrar]): Owner
        description (str): Description of primary data to be registered


    Returns:
        source (ColumnSourceRegistrar): Source
    """
    return self.__registrar.sql_transformation(name=name,
                                               variant=variant,
                                               owner=owner,
                                               provider=self.name(),
                                               description=description,
                                               tags=tags,
                                               properties=properties)

src.featureform.register.LocalSource

LocalSource creates a reference to a source that can be accessed locally.

Source code in src/featureform/register.py
class LocalSource:
    """
    LocalSource creates a reference to a source that can be accessed locally.
    """

    def __init__(self,
                 registrar,
                 name: str,
                 owner: str,
                 variant: str,
                 provider: str,
                 path: str,
                 description: str = ""):
        self.registrar = registrar
        self.name = name
        self.variant = variant
        self.owner = owner
        self.provider = provider
        self.path = path
        self.description = description

    def __call__(self, fn: Callable[[], str]):
        if self.description == "":
            self.description = fn.__doc__
        if self.name == "":
            self.name = fn.__name__
        self.__set_query(fn())
        fn.register_resources = self.register_resources
        return fn

    def __getitem__(self, columns: List[str]):
        col_len = len(columns)
        if col_len < 2:
            raise Exception(f"Expected 2 columns, but found {col_len}. Missing entity and/or source columns")
        return (self.registrar, self.name_variant(), columns)

    def name_variant(self):
        return (self.name, self.variant)

    def pandas(self):
        """
        Returns the local source as a pandas datafame.

        Returns:
        dataframe (pandas.Dataframe): A pandas Dataframe
        """
        return pd.read_csv(self.path)

    def register_resources(
            self,
            entity: Union[str, EntityRegistrar],
            entity_column: str,
            owner: Union[str, UserRegistrar] = "",
            inference_store: Union[str, OnlineProvider, FileStoreProvider] = "",
            features: List[ColumnMapping] = None,
            labels: List[ColumnMapping] = None,
            timestamp_column: str = "",
    ):
        """
        Registers a features and/or labels that can be used in training sets or served.

        **Examples**:
        ``` py
        average_user_transaction.register_resources(
            entity=user,
            entity_column="CustomerID",
            inference_store=local,
            features=[
                {"name": <feature name>, "variant": <feature variant>, "column": <value column>, "type": "float32"}, # Column Mapping
            ],
        )
        ```

        Args:
            entity (Union[str, EntityRegistrar]): The name to reference the entity by when serving features
            entity_column (str): The name of the column in the source to be used as the entity
            owner (Union[str, UserRegistrar]): The owner of the resource(s)
            inference_store (Union[str, OnlineProvider, FileStoreProvider]): Where to store the materialized feature for serving. (Use the local provider in Localmode)
            features (List[ColumnMapping]): A list of column mappings to define the features
            labels (List[ColumnMapping]): A list of column mappings to define the labels
            timestamp_column: (str): The name of an optional timestamp column in the dataset. Will be used to match the features and labels with point-in-time correctness

        Returns:
            registrar (ResourceRegister): Registrar
        """
        return self.registrar.register_column_resources(
            source=(self.name, self.variant),
            entity=entity,
            entity_column=entity_column,
            owner=owner,
            inference_store=inference_store,
            features=features,
            labels=labels,
            timestamp_column=timestamp_column,
            description=self.description,
        )

pandas()

Returns the local source as a pandas datafame.

dataframe (pandas.Dataframe): A pandas Dataframe

Source code in src/featureform/register.py
def pandas(self):
    """
    Returns the local source as a pandas datafame.

    Returns:
    dataframe (pandas.Dataframe): A pandas Dataframe
    """
    return pd.read_csv(self.path)

register_resources(entity, entity_column, owner='', inference_store='', features=None, labels=None, timestamp_column='')

Registers a features and/or labels that can be used in training sets or served.

Examples:

average_user_transaction.register_resources(
    entity=user,
    entity_column="CustomerID",
    inference_store=local,
    features=[
        {"name": <feature name>, "variant": <feature variant>, "column": <value column>, "type": "float32"}, # Column Mapping
    ],
)

Parameters:

Name Type Description Default
entity Union[str, EntityRegistrar]

The name to reference the entity by when serving features

required
entity_column str

The name of the column in the source to be used as the entity

required
owner Union[str, UserRegistrar]

The owner of the resource(s)

''
inference_store Union[str, OnlineProvider, FileStoreProvider]

Where to store the materialized feature for serving. (Use the local provider in Localmode)

''
features List[ColumnMapping]

A list of column mappings to define the features

None
labels List[ColumnMapping]

A list of column mappings to define the labels

None
timestamp_column str

(str): The name of an optional timestamp column in the dataset. Will be used to match the features and labels with point-in-time correctness

''

Returns:

Name Type Description
registrar ResourceRegister

Registrar

Source code in src/featureform/register.py
def register_resources(
        self,
        entity: Union[str, EntityRegistrar],
        entity_column: str,
        owner: Union[str, UserRegistrar] = "",
        inference_store: Union[str, OnlineProvider, FileStoreProvider] = "",
        features: List[ColumnMapping] = None,
        labels: List[ColumnMapping] = None,
        timestamp_column: str = "",
):
    """
    Registers a features and/or labels that can be used in training sets or served.

    **Examples**:
    ``` py
    average_user_transaction.register_resources(
        entity=user,
        entity_column="CustomerID",
        inference_store=local,
        features=[
            {"name": <feature name>, "variant": <feature variant>, "column": <value column>, "type": "float32"}, # Column Mapping
        ],
    )
    ```

    Args:
        entity (Union[str, EntityRegistrar]): The name to reference the entity by when serving features
        entity_column (str): The name of the column in the source to be used as the entity
        owner (Union[str, UserRegistrar]): The owner of the resource(s)
        inference_store (Union[str, OnlineProvider, FileStoreProvider]): Where to store the materialized feature for serving. (Use the local provider in Localmode)
        features (List[ColumnMapping]): A list of column mappings to define the features
        labels (List[ColumnMapping]): A list of column mappings to define the labels
        timestamp_column: (str): The name of an optional timestamp column in the dataset. Will be used to match the features and labels with point-in-time correctness

    Returns:
        registrar (ResourceRegister): Registrar
    """
    return self.registrar.register_column_resources(
        source=(self.name, self.variant),
        entity=entity,
        entity_column=entity_column,
        owner=owner,
        inference_store=inference_store,
        features=features,
        labels=labels,
        timestamp_column=timestamp_column,
        description=self.description,
    )