Source code for dlk.process

# Copyright 2021 cstsunfu. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from dlk.utils.parser import BaseConfigParser
import json
import hjson
from typing import Dict, Union, Any
from dlk.data.processors import processor_config_register, processor_register
from dlk.utils.io import open


[docs]class Processor(object): """Processor""" def __init__(self, config: Union[str, Dict]): super(Processor, self).__init__() if not isinstance(config, dict): with open(config) as f: config = hjson.load(f, object_pairs_hook=dict) config = BaseConfigParser(config).parser_with_check() assert len(config) == 1, f"Currently we didn't support search for Processor, if you require this feature please create an issue to describe the reason details." self.config = config[0] self.config = self.config['processor']
[docs] def fit(self, data: Dict[str, Any], stage='train'): """Process the data and return the processed data Args: data: {"train": .., 'valid': ..} stage: "train"/ 'predict', etc. Returns: processed data """ processor = processor_register.get(self.config.get('_name'))(stage=stage, config=processor_config_register.get(self.config.get('_name'))(stage=stage, config=self.config)) return processor.process(data)