Source code for pyroSAR.gamma.parser

import os
import re
import subprocess as sp
from collections import Counter
from spatialist.ancillary import finder, which

from .auxil import ExamineGamma


[docs]def parse_command(command): """ Parse the help text of a Gamma command to a Python function including a docstring. The docstring is in rst format and can thu be parsed by e.g. sphinx. This function is not intended to be used by itself, but rather within function :func:`parse_module`. Parameters ---------- command: str the name of the gamma command Returns ------- str the full Python function text """ command = which(command) proc = sp.Popen(command, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE, universal_newlines=True) out, err = proc.communicate() out += err # filter header command description and usage description text header = '\n'.join([x.strip('* ') for x in re.findall('[*]{3}.*[*]{3}', out)]) header = '| ' + header.replace('\n', '\n| ') usage = re.search('usage:.*(?=\n)', out).group() # filter required and optional arguments from usage description text arg_req = [re.sub('[^\w.-]*', '', x) for x in re.findall('[^<]*<([^>]*)>', usage)] arg_opt = [re.sub('[^\w.-]*', '', x) for x in re.findall('[^[]*\[([^]]*)\]', usage)] # fix inconsistencies in parameter naming related to case differences, # e.g. ISP_PAR in the usage text vs. ISP_Par in the parameter description for arg in arg_req + arg_opt: for item in re.findall(arg, out, re.IGNORECASE): if item != arg: out = out.replace(item, arg) double = [k for k, v in Counter(arg_req + arg_opt).items() if v > 1] if len(double) > 0: raise RuntimeError('double parameter{0}: {1}'.format('s' if len(double)> 1 else '', ', '.join(double))) # print('header_raw: \n{}\n'.format(header)) # print('usage_raw: \n{}\n'.format(usage)) # print('required args: {}\n'.format(', '.join(arg_req))) # print('optional args: {}\n'.format(', '.join(arg_opt))) # print('double args: {}\n'.format(', '.join(double))) # create the function argument string for the Python function # optional arguments are parametrized with '-' as default value, e.g. arg_opt='-' # a '-' in the parameter name is replaced with '_' # example: "arg1, arg2, arg3='-'" argstr_function = re.sub(r'([^\'])-([^\'])', r'\1_\2', ', '.join(arg_req + [x + "='-'" for x in arg_opt])) \ .replace(', def=', ', drm=') # create the process call argument string # a '-' in the parameter name is replaced with '_' # e.g. 'arg1, arg2, arg3' # if a parameter is named 'def' (not allowed in Python) it is renamed to 'drm' argstr_process = ', '.join(arg_req + arg_opt) \ .replace('-', '_') \ .replace(', def,', ', drm,') # print('arg_str1: \n{}\n'.format(argstr_function)) # print('arg_str2: \n{}\n'.format(argstr_process)) # define the start of the parameter documentation string, which is either after 'input_parameters' or after # the usage description string doc_start = 'input parameters:[ ]*\n' if re.search('input parameters', out) else 'usage:.*(?=\n)' # parse the parameter documentation to a Python docstring format # define the number of spaces to indent indent = ' ' * 4 docstring_elements = ['Parameters\n----------'] # gather the indices, which mark the documentation start of the respective parameters within # the raw documentation text starts = [] for x in arg_req + arg_opt: try: starts.append(re.search(r'\n[ ]*{0} .*'.format(x), out).start()) except AttributeError: raise RuntimeError('cannot find parameter {}'.format(x)) starts += [len(out)] # define a pattern for parsing individual parameter documentations pattern = r'\n[ ]*(?P<par>{0})[ ]+(?P<doc>.*)'.format('|'.join(arg_req + arg_opt)) # print(pattern) for i in range(0, len(starts) - 1): # draw a subset from the Gamma docstring containing only the doc of a single parameter doc_raw = out[starts[i]:starts[i + 1]] # print(repr(doc_raw)) # parse the docstring match = re.match(pattern, doc_raw, flags=re.DOTALL) if not match: continue # retrieve the parameter name and the documentation lines par = match.group('par') doc_items = re.split('\n+\s*', match.group('doc').strip('\n')) # escape * characters (which are treated as special characters for bullet lists by sphinx) doc_items = [x.replace('*', '\*') for x in doc_items] # convert all lines starting with an integer number or 'NOTE' to bullet list items latest = None for i in range(len(doc_items)): item = doc_items[i] if re.search('^(?:(?:-|)[-0-9]+|NOTE):', item): latest = i # prepend '* ' and replace missing spaces after a colon: 'x:x' -> 'x: x' doc_items[i] = '* ' + re.sub(r'((?:-|)[-0-9]+:)(\w+)', r'\1 \2', item) # format documentation lines coming after the last bullet list item # sphinx expects lines after the last bullet item to be indented by two spaces if # they belong to the bullet item or otherwise a blank line to mark the end of the bullet list if latest: # case if there are still lines coming after the last bullet item, # prepend an extra two spaces to these lines so that they are properly # aligned with the text of the bullet item if latest + 2 <= len(doc_items): i = 1 while latest + i + 1 <= len(doc_items): doc_items[latest + i] = ' ' + doc_items[latest + i] i += 1 # if not, then insert an extra blank line else: doc_items[-1] = doc_items[-1] + '\n' # parse the final documentation string for the current parameter description = '\n{0}{0}'.join(doc_items).format(indent) doc = '{0}:\n{1}{2}'.format(par, indent, description) docstring_elements.append(doc) # create docstring for parameter logpath doc = 'logpath: str or None\n{0}a directory to write command logfiles to'.format(indent) docstring_elements.append(doc) # create the function definition string fun_def = 'def {name}({args_fun}, logpath=None):' \ .format(name=os.path.basename(command).replace('-', '_'), args_fun=argstr_function) # create the complete docstring fun_doc = '\n{header}\n\n{doc}\n' \ .format(header=header, doc='\n'.join(docstring_elements)) # create the process call string fun_proc = "process(['{command}', {args_cmd}], logpath=logpath)" \ .format(command=command, args_cmd=argstr_process) # combine the elements to a complete Python function string fun = '''{defn}\n"""{doc}"""\n{proc}'''.format(defn=fun_def, doc=fun_doc, proc=fun_proc) # indent all lines and add an extra empty line at the end fun = fun.replace('\n', '\n{}'.format(indent)) + '\n' return fun
[docs]def parse_module(bindir, outfile): """ parse all Gamma commands of a module to functions and save them to a Python script. Parameters ---------- bindir: str the `bin` directory of a module containing the commands outfile: str the name of the Python file to write Returns ------- Examples -------- >>> import os >>> from pyroSAR.gamma.parser import parse_module >>> outname = os.path.join(os.environ['HOME'], 'isp.py') >>> parse_module('/cluster/GAMMA_SOFTWARE-20161207/ISP/bin', outname) """ excludes = ['coord_trans', 'mosaic', 'lin_comb', 'lin_comb_cpx', 'validate'] failed = [] outstring = 'from pyroSAR.gamma.auxil import process\n\n\n' for cmd in sorted(finder(bindir, ['*']), key=lambda s: s.lower()): basename = os.path.basename(cmd) if basename not in excludes: # print(basename) try: fun = parse_command(cmd) except RuntimeError as e: failed.append('{0}: {1}'.format(basename, str(e))) continue outstring += fun + '\n\n' with open(outfile, 'w') as out: out.write(outstring) if len(failed) > 0: print('the following functions could not be parsed:\n{0}\n({1} total)'.format('\n'.join(failed), len(failed)))
[docs]def autoparse(): """ automatic parsing of Gamma commands. This function will detect the Gamma installation via environment variable `GAMMA_HOME`, detect all available modules (e.g. ISP, DIFF) and parse all of the module's commands via function :func:`parse_module`. A new Python module will be created called `gammaparse`, which is stored under `$HOME/.pyrosar`. Upon importing the `pyroSAR.gamma` submodule, this function is run automatically and module `gammaparse` is imported as `api`. Returns ------- Examples -------- >>> from pyroSAR.gamma.api import diff >>> print('create_dem_par' in dir(diff)) True """ home = ExamineGamma().home target = os.path.join(os.path.expanduser('~'), '.pyrosar', 'gammaparse') if not os.path.isdir(target): os.makedirs(target) for module in finder(home, ['[A-Z]*'], foldermode=2): outfile = os.path.join(target, os.path.basename(module).lower() + '.py') if not os.path.isfile(outfile): print('parsing module {}'.format(os.path.basename(module))) parse_module(os.path.join(module, 'bin'), outfile) print('=' * 20) modules = [re.sub('\.py', '', os.path.basename(x)) for x in finder(target, ['[a-z]+\.py$'], regex=True)] if len(modules) > 0: with open(os.path.join(target, '__init__.py'), 'w') as init: init.write('from . import {}'.format(', '.join(modules)))