Source code for fitting.fit_single

import numpy as np

from esr.fitting.test_all import optimise_fun
from esr.fitting.test_all_Fisher import convert_params

import esr.generation.generator as generator
import esr.generation.simplifier as simplifier


[docs] def single_function(labels, basis_functions, likelihood, pmin=0, pmax=5, tmax=5, try_integration=False, verbose=False, Niter=30, Nconv=5, log_opt=False, return_params=False): """Run end-to-end fitting of function for a single function Args: :labels (list): list of strings giving node labels of tree :basis_functions (list): list of lists basis functions. basis_functions[0] are nullary, basis_functions[1] are unary and basis_functions[2] are binary operators :likelihood (fitting.likelihood object): object containing data, likelihood functions and file paths :pmin (float, default=0.): minimum value for each parameter to consider when generating initial guess :pmax (float, default=3.): maximum value for each parameter to consider when generating initial guess :tmax (float, default=5.): maximum time in seconds to run any one part of simplification procedure for a given function :try_integration (bool, default=False): when likelihood requires integral, whether to try to analytically integrate (True) or just numerically integrate (False) :verbose (bool, default=True): Whether to print results (True) or not (False) :Niter (int, default=30): Maximum number of parameter optimisation iterations to attempt. :Nconv (int, default=5): If we find Nconv solutions for the parameters which are within a logL of 0.5 of the best, we say we have converged and stop optimising parameters :log_opt (bool, default=False): whether to optimise 1 and 2 parameter cases in log space :return_params (bool, default=False): whether to return the parameters of the maximum likelihood point Returns: :negloglike (float): the minimum value of -log(likelihood) (corresponding to the maximum likelihood) :DL (float): the description length of this function :params (optional, list): the maximum likelihood parameters. Only returned if `return_params` is true """ # (1) Convert the string to a sympy function s = generator.labels_to_shape(labels, basis_functions) success, _, tree = generator.check_tree(s) fstr = generator.node_to_string(0, tree, labels) max_param = simplifier.get_max_param([fstr], verbose=verbose) fstr, fsym = simplifier.initial_sympify( [fstr], max_param, parallel=False, verbose=verbose) fstr = fstr[0] fsym = fsym[fstr] print(fstr) # (2) Fit this function to the data chi2, params = optimise_fun(fstr, likelihood, tmax, pmin, pmax, try_integration=try_integration, max_param=max_param, Niter_params=[Niter], Nconv_params=[Nconv], log_opt=log_opt) if likelihood.is_mse: print('Not computing DL as using MSE') DL = np.nan negloglike = chi2 else: # (3) Obtain the Fisher matrix for this function fcn, eq, integrated = likelihood.run_sympify(fstr, tmax=tmax, try_integration=try_integration) params, negloglike, deriv, codelen = convert_params( fcn, eq, integrated, params, likelihood, chi2, max_param=max_param) if verbose: print('\ntheta_ML:', params) print('Residuals:', negloglike, chi2) print('Parameter:', codelen) # (4) Get the functional complexity param_list = ['a%i' % j for j in range(max_param)] aifeyn = generator.aifeyn_complexity(labels, param_list) if verbose: print('Function:', aifeyn) # (5) Combine to get description length DL = negloglike + codelen + aifeyn if verbose: print('\nDescription length:', DL) if return_params: return negloglike, DL, params return negloglike, DL
[docs] def fit_from_string(fun, basis_functions, likelihood, pmin=0, pmax=5, tmax=5, try_integration=False, verbose=False, Niter=30, Nconv=5, maxvar=20, log_opt=False, replace_floats=False, return_params=False): """Run end-to-end fitting of function for a single function, given as a string. Note that this is not guaranteed to find the optimimum representation as a tree, so there could be a lower description-length representation of the function Args: :fun (str): String representation of the function to be fitted :basis_functions (list): list of lists basis functions. basis_functions[0] are nullary, basis_functions[1] are unary and basis_functions[2] are binary operators :likelihood (fitting.likelihood object): object containing data, likelihood functions and file paths :pmin (float, default=0.): minimum value for each parameter to consider when generating initial guess :pmax (float, default=3.): maximum value for each parameter to consider when generating initial guess :tmax (float, default=5.): maximum time in seconds to run any one part of simplification procedure for a given function :try_integration (bool, default=False): when likelihood requires integral, whether to try to analytically integrate (True) or just numerically integrate (False) :verbose (bool, default=True): Whether to print results (True) or not (False) :Niter (int, default=30): Maximum number of parameter optimisation iterations to attempt. :Nconv (int, default=5): If we find Nconv solutions for the parameters which are within a logL of 0.5 of the best, we say we have converged and stop optimising parameters :maxvar (int): The maximum number of variables which could appear in the function :log_opt (bool, default=False): whether to optimise 1 and 2 parameter cases in log space :replace_floats (bool, default=False): whether to replace any numbers found in the function with variables to optimise :return_params (bool, default=False): whether to return the parameters of the maximum likelihood point Returns: :negloglike (float): the minimum value of -log(likelihood) (corresponding to the maximum likelihood) :DL (float): the description length of this function :labels (list): list of strings giving node labels of tree :params (optional, list): the maximum likelihood parameters. Only returned if `return_params` is true """ expr, nodes, complexity = generator.string_to_node( fun, basis_functions, evalf=True) labels = nodes.to_list(basis_functions) # Prepare to get parents new_labels = [None] * len(labels) for j, lab in enumerate(labels): if lab == 'Mul': new_labels[j] = '*' labels[j] = '*' elif lab == 'Add': new_labels[j] = '+' labels[j] = '+' elif lab == 'Div': new_labels[j] = '/' labels[j] = '/' elif lab == 'Sub': new_labels[j] = '-' labels[j] = '-' else: new_labels[j] = lab.lower() labels[j] = lab.lower() param_idx = [j for j, lab in enumerate(new_labels) if generator.is_float( lab) or (lab.startswith('a') and generator.is_float(lab[1:]))] assert len(param_idx) <= maxvar for k, j in enumerate(param_idx): new_labels[j] = f'a{k}' # Get parent operators s = generator.labels_to_shape(new_labels, basis_functions) success, _, tree = generator.check_tree(s) parents = [None] + [labels[p.parent] for p in tree[1:]] # Replace floats with symbols (except exponents) if replace_floats: param_idx = [j for j, lab in enumerate(labels) if (generator.is_float(lab) and not ( parents[j].lower() == 'pow')) or (lab.startswith('a') and generator.is_float(lab[1:]))] for k, j in enumerate(param_idx): labels[j] = f'a{k}' print(labels) res = single_function( labels, basis_functions, likelihood, pmin=pmin, pmax=pmax, tmax=tmax, try_integration=try_integration, verbose=verbose, Niter=Niter, Nconv=Nconv, log_opt=log_opt, return_params=return_params ) if return_params: return res[0], res[1], labels, res[2] return res[0], res[1], labels
[docs] def tree_to_aifeyn(labels, basis_functions, verbose=True): """ Takes a list of labels defining a function and returns the AIFeyn term of complexity and the complexity of the function Args: :labels (list): list of strings giving node labels of tree :basis_functions (list): list of lists basis functions. basis_functions[0] are nullary, basis_functions[1] are unary and basis_functions[2] are binary operators :verbose (bool, default=True): Whether to print results (True) or not (False) Returns: :aifeyn (float): the contribution to description length from describing tree :complexity (int): the number of nodes in the function """ # Convert the string to a sympy function s = generator.labels_to_shape(labels, basis_functions) success, _, tree = generator.check_tree(s) fstr = generator.node_to_string(0, tree, labels) max_param = simplifier.get_max_param([fstr], verbose=verbose) # Get the functional complexity param_list = ['a%i' % j for j in range(max_param)] aifeyn = generator.aifeyn_complexity(labels, param_list) if verbose: print('Function:', aifeyn) return aifeyn, len(labels)
[docs] def string_to_aifeyn(fun, basis_functions, maxvar=20, verbose=True, replace_floats=False): """ Takes a string defining a function and returns the AIFeyn term of complexity and the complexity of the function Args: :fun (str): String representation of the function to be fitted :basis_functions (list): list of lists basis functions. basis_functions[0] are nullary, basis_functions[1] are unary and basis_functions[2] are binary operators :maxvar (int, default=20): The maximum number of variables which could appear in the function :verbose (bool, default=True): Whether to print results (True) or not (False) :replace_floats (bool, default=False): whether to replace any numbers found in the function with variables to optimise Returns: :aifeyn (float): the contribution to description length from describing tree :complexity (int): the number of nodes in the function """ expr, nodes, complexity = generator.string_to_node( fun, basis_functions, evalf=True) labels = nodes.to_list(basis_functions) # Prepare to get parents new_labels = [None] * len(labels) for j, lab in enumerate(labels): if lab == 'Mul': new_labels[j] = '*' labels[j] = '*' elif lab == 'Add': new_labels[j] = '+' labels[j] = '+' elif lab == 'Div': new_labels[j] = '/' labels[j] = '/' elif lab == 'Sub': new_labels[j] = '-' labels[j] = '-' else: new_labels[j] = lab.lower() labels[j] = lab.lower() param_idx = [j for j, lab in enumerate(new_labels) if generator.is_float( lab) or (lab.startswith('a') and generator.is_float(lab[1:]))] assert len(param_idx) <= maxvar for k, j in enumerate(param_idx): new_labels[j] = f'a{k}' # Get parent operators s = generator.labels_to_shape(new_labels, basis_functions) success, _, tree = generator.check_tree(s) parents = [None] + [labels[p.parent] for p in tree[1:]] # Replace floats with symbols (except exponents) if replace_floats: param_idx = [j for j, lab in enumerate(labels) if (generator.is_float(lab) and not ( parents[j].lower() == 'pow')) or (lab.startswith('a') and generator.is_float(lab[1:]))] for k, j in enumerate(param_idx): labels[j] = f'a{k}' return tree_to_aifeyn(labels, basis_functions, verbose=verbose)