Advice regarding building python modules


#1

Hi all!

What’s the best method for building/compiling/installing Python modules? Currently, our Python plans run like so:

do_build(){
  python setup.py build_ext --fcompiler=gfortran
  python3 setup.py build_ext --fcompiler=gfortran
}

do_install(){
  push_runtime_env 'PYTHONPATH' "$PYTHON_SITE_PACKAGES"
  python setup.py install \
    --prefix="$pkg_prefix" \
    --old-and-unmanageable
  python setup.py bdist_wheel

  python3 setup.py install \
    --prefix="$pkg_prefix" \
    --old-and-unmanageable
  python3 setup.py bdist_wheel
  cp -a dist/. $pkg_prefix
}

Then, in any plan we want to use those wheels in:

do_prepare(){
  pip install wheel
  pip3 install wheel

  pip install $(pkg_path_for kepler/numpy)/numpy-1.14.2-cp27-cp27mu-linux_x86_64.whl
  pip3 install $(pkg_path_for kepler/numpy)/numpy-1.14.2-cp36-cp36m-linux_x86_64.whl
}

Is this manner of packaging wheels and then installing them during the prepare phase of the dependent package advisable?

Thanks!


#2

@mike10010100, at my org we source a module_rigging.sh file in each of our ‘vendored’ module’s plan.sh files (we do this to DRY up the vendored module plans):

pkg_maintainer="smartB Engineering <dev@smartb.eu>"
pkg_lib_dirs=(lib)
python_major_version="3.6"
python_minor_version="3"
pkg_build_deps=(
  core/inetutils
  core/curl
  core/gcc
  core/jq-static
  core/libffi
  core/python/${python_major_version}.${python_minor_version}
)

do_before() {
  update_pkg_version
}

do_setup_environment() {
  push_runtime_env   PYTHONPATH      "${pkg_prefix}/lib/python${python_major_version}/site-packages"
  push_buildtime_env LD_LIBRARY_PATH "$(pkg_path_for core/gcc)/lib"
  push_buildtime_env LD_LIBRARY_PATH "$(pkg_path_for core/libffi)/lib"
  push_buildtime_env LD_LIBRARY_PATH "$(pkg_path_for core/pcre)/lib"
  return $?
}

do_prepare() {
  python -m venv "${pkg_prefix}"
  source "${pkg_prefix}/bin/activate"
  return $?
}

do_build() {
  return 0
}

do_install() {
  pip install --quiet --no-cache-dir "${pkg_name}==${pkg_version}"
  export module_version=$(python -c "import ${pkg_name}; print(${pkg_name}.__version__)")
  build_line "${pkg_name} version: ${module_version}"
  return $?
}

do_strip() {
  for module in $(pip freeze | grep -v $pkg_name==$pkg_version)
  do
    pip uninstall --yes $module
  done
  rm -rf ${pkg_prefix}/lib/python3.6/site-packages/pip*
  rm -rf ${pkg_prefix}/lib64/python3.6/site-packages/pip*
  rm -rf ${pkg_prefix}/lib/python3.6/site-packages/setuptools*
  rm -rf ${pkg_prefix}/lib64/python3.6/site-packages/setuptools*
  rm -rf ${pkg_prefix}/bin/pip*
  return $?
}

do_end() {
  export pkg_origin
  export pkg_name
  export pkg_version
  export pkg_release
}

do_after_success() {
  $PLAN_CONTEXT/../../functions/notify_build_completed.sh
  return $?
}

do_after_failure() {
  $PLAN_CONTEXT/../../functions/notify_build_failed.sh
  return $?
}

This allows us to populate our PYTHONPATH automatically as we depend on vendored modules. This has worked well for us, and we can still override callback definitions that originated in the module_rigging.sh file by re-defining them in the main plan.sh. The following example was pulled from https://bldr.habitat.sh/#/pkgs/smartb/tensorflow/latest:

pkg_origin="smartb"
pkg_name="tensorflow"
pkg_version="1.5.0"
pkg_description="Let the tensors flow"
pkg_upstream_url="http://tensorflow.org/"
pkg_maintainer="smartB Engineering <dev@smartb.eu>"
pkg_license=('Apache-2.0')
pkg_deps=(core/python)
pkg_build_deps=(core/gcc-libs)
pkg_bin_dirs=(bin)
pkg_lib_dirs=(lib)

do_setup_environment() {
  HAB_ENV_LD_LIBRARY_PATH_TYPE="aggregate"
  push_runtime_env LD_LIBRARY_PATH "$(pkg_path_for core/gcc-libs)/lib"
  push_runtime_env PYTHONPATH      "${pkg_prefix}/lib/python3.6/site-packages"
}

do_prepare() {
  python -m venv "${pkg_prefix}"
  source "${pkg_prefix}/bin/activate"
}

do_build() {
  return 0
}

do_install() {
  unset PYTHONPATH
  pip install "${pkg_name}==${pkg_version}"
}

do_strip() {
  return 0
}

do_end() {
  build_line "Tensorflow version: $(python -c 'import tensorflow; print(tensorflow.__version__)')"
  python -c 'import tensorflow; print(tensorflow.__version__)' > /dev/null
}

#3

I had a quick question surrounding your solution: are you passing around the virtualenv as the output to your package? If so, are you then sourcing it with every new package?

Thanks again!


#4

Hi @bixu, any update on my question? Currently, we’re passing around venv folders as a result of the build, but I’m wondering if this is unnecessary.


#5

When we

push_runtime_env   PYTHONPATH      "${pkg_prefix}/lib/python${python_major_version}/site-packages"

we are propagating the location of the modules we installed with virtualenv to any packages that depend on our Python module package. So we are getting the files into place with virtualenv but don’t use it after that because Habitat’s aggregation of paths into a single concatenated PYTHONPATH is doing the same thing that source ./bin/activate would.


#6

@bixu Ahhhh that makes a lot more sense! Thank you!


#7

Very happy to help! Python magic in Habitat is less magical than one might hope, I think partly because workflow patterns in the Python community are less well-defined than for languages like Ruby. For example, every Rubyist uses Bundler, but Python has Virtualenv, Pyenv and others. For us at smartB this has meant digging into Python ecosystem internals a bit, which is why we built some of the stuff I pointed you to.