rasterio basics
Gdal/Rasterio Playground¶
Date: 11-16-2018 (Frdiay)
Caution: it's not recommended to import osgeo.gdal and rasterio in the same kernel.
from pathlib import Path
import numpy as np
rgb_path = Path('/home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/AOI_2_Vegas_Roads_Sample/RGB-PanSharpen/RGB-PanSharpen_AOI_2_Vegas_img1454.tif')
vec_path = Path('/home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/AOI_2_Vegas_Roads_Sample/geojson/spacenetroads/spacenetroads_AOI_2_Vegas_img1454.geojson')
buffer_path = '/home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/AOI_2_Vegas_Roads_Sample/geojson/buffer/buffer_AOI_2_Vegas_img1454.geojson'
Gdal¶
from osgeo import gdal
ds = gdal.Open(str(rgb_path))
gt = ds.GetGeoTransform()
print(gt)
gt_np = np.array(gt).reshape(2,3)
print(gt_np)
ds.RasterXSize
print("upper left in CRS: ", gdal.ApplyGeoTransform(gt, 0, 0))
print("lower left in CRS: ", gdal.ApplyGeoTransform(gt, 0, ds.RasterYSize-1))
print("lower right in CRS: ", gdal.ApplyGeoTransform(gt, ds.RasterXSize-1, ds.RasterYSize-1))
print("upper right in CRS: ", gdal.ApplyGeoTransform(gt, ds.RasterXSize-1,0))
print("lower right in CRS: ", gdal.ApplyGeoTransform(gt, ds.RasterXSize, ds.RasterYSize))
ds2 = gdal.Open('./data/osgeo-workshop/geomatrix.tif')
!gdalinfo './data/osgeo-workshop/geomatrix.tif'
!gdalinfo '/home/hayley/mercator.tif'
!gdal_translate -of GIF ./data/osgeo-workshop/wellington_west.png ./wellington_west.gif
!gdalinfo -noct ./data/osgeo-workshop/wellington_west.png
!cat ./data/osgeo-workshop/wellington_west.wld
!gdal_translate -a_srs EPSG:2193 ./data/osgeo-workshop/wellington_west.png wellington_west.tif
!gdalinfo -noct wellington_west.tif
import cv2
import matplotlib.pyplot as plt
im = cv2.imread('wellington_west.tif')
plt.imshow(im)
im_png = cv2.imread('./data/osgeo-workshop/wellington_west.png')
plt.imshow(im_png)
!gdal_translate -ot Byte -of PNG -scale $rgb_path ./8bits.png
temp = cv2.imread('8bits.png')
plt.imshow(temp)
Vector data: ogr
¶
from osgeo import ogr
orginfo
-ro
: read-only-so
: summary-only- provides extra informatio such as
number of features
,spatial extent
in (xmin, ymin) – (xmax, ymax) format,coordinate system
,list of attributes
with their name,type
andwidth.precision
- provides extra informatio such as
-al
: all-layers
!ogrinfo -so -al $vec_path
With Python API, we can get the similar information.
ds = ogr.Open(str(vec_path))
print("number of layers: ", ds.GetLayerCount())
lyr = ds.GetLayerByIndex(0) #here index is 0-based unlike gdal's band indexing (1-based)
print(lyr.GetLayerDefn())
for feat in lyr:
print("Feature: ", feat.GetFID())
for i in range(feat.GetFieldCount()):
if feat.IsFieldSet(i):
print('Field is ', feat.GetFieldDefnRef(i).GetName())
print(str(feat.GetField(i)))
# pdb.set_trace()
geom = feat.GetGeometryRef()
if geom is not None:
print('Geometry: ', geom.ExportToWkt()[0:20]) # truncated geometry
!ogrinfo -so -al $buffer_path
Vector to Raster: gdal_rasterize
¶
- the vector file and the raster must be in the same coordinate system
!gdalinfo 8bits.png
!gdal_rasterize -burn 255 -l buffer_AOI_2_Vegas_img1454 -ts 1300 1300 -ot Byte $buffer_path ./buffer_raster.tif
buff_raster = cv2.imread('./buffer_raster.tif',-1)
plt.imshow(buff_raster)
IPython Widget Basics¶
TODO: Use leaflet widget to interactively toggle these images for visual comparison: add three layers. one for the original map the other for thi buffer raster and another one with the osm basemap¶
import ipywidgets as widgets
widgets.IntSlider(min=0, max=10)
from IPython.display import display
w = widgets.IntSlider(0,100)
display(w)
Using interact
function of ipywidgets
¶
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
- Basic
interact
At the basic level.interact
autogenerates UI controls for function arguments, and then calls the function with those arguments when you manipulate the controls interactively. To useinteract
, we need to define a function that we want to explore. Let's take a look at this function that has two arguments,x
, andy
.
def f(x,y): return np.cos(x) + np.sin(y)
print(f(10,10))
interact(f,x=widgets.FloatSlider(min=-10, max=10, value=0),y=fixed(10))
interact(f, x=(-10.,10.,0.1), y=fixed(0))
Using interactive
function to get the widget returned¶
from IPython.display import display
def f(a,b):
display(a+b)
return a+b
w = interactive(f, a=(-1., 5.,0.2), b=(-10., 10., 0.3))
display(w)
w.children
w.kwargs
w.result
Disabling continuous updates for long-running functions¶
def slow_function(i):
print(int(i))
print(list(x for x in range(int(i)) if
str(x)==str(x)[::-1] and
str(x**2)==str(x**2)[::-1]))
return
%%time
slow_function(1e6)
interact(slow_function, i=(1e5,1e7,1e5))
Notice this widget is continuously updated as we slide the slider. Instead, we would like the function slow_function
to be called with the value at the slider when we release the mouse. interact_manual
function provides a variant of interaction that allows you to restrict execution so it is only done on demand. A button is added to the interact controls that allows you to trigger an execution.
interact_manual(slow_function, i=(1e5, 1e7, 1e5))
Alternatively, we can input the FloatSlider object with continuous_update
argument set to False
:
interact(slow_function, widgets.FloatSlider(min=1e5, max=1e7, step=1e5, continuous_update=False))
interact(slow_function,i=FloatSlider(min=1e5, max=1e7, step=1e5, continuous_update=False));
def f(m,b):
plt.figure(2)
x = np.linspace(-10,10,num=1000)
plt.plot(x, m*x + b)
plt.ylim(-5,5)
plt.show()
interactive_plot = interactive(f, m=(-2.,2.), b=(-10.,10.,0.5))
output = interactive_plot.children[-1]
type(output)
output.layout.height = '350px'
interactive_plot
More examples of widgets¶
widgets.IntProgress(min=0, max=100, step=1,
value=10,
description='Loading: ',
bar_style='info',
orientation='horizontal')
play = widgets.Play(min=0, max=100, step=1, value=10,
description='Press play',
disabled=False)
slider = widgets.IntSlider()
widgets.jslink((play,'value'), (slider, 'value'))
display(play, slider)
widgets.HBox([play, slider])
widgets.ColorPicker(
concise=False,
description='Pick a color',
value='blue',
disalbed=False)
Output widgets and logging¶
import ipywidgets as widgets
import logging
class OutputWidgetHandler(logging.Handler):
""" Custom logging handler sending logs to an output widget """
def __init__(self, *args, **kwargs):
super(OutputWidgetHandler, self).__init__(*args, **kwargs)
layout = {
'width': '100%',
'height': '160px',
'border': '1px solid black'
}
self.out = widgets.Output(layout=layout)
def emit(self, record):
""" Overload of logging.Handler method """
formatted_record = self.format(record)
new_output = {
'name': 'stdout',
'output_type': 'stream',
'text': formatted_record+'\n'
}
self.out.outputs = (new_output, ) + self.out.outputs
def show_logs(self):
""" Show the logs """
display(self.out)
def clear_logs(self):
""" Clear the current logs """
self.out.clear_output()
logger = logging.getLogger(__name__)
handler = OutputWidgetHandler()
handler.setFormatter(logging.Formatter('%(asctime)s - [%(levelname)s] %(message)s'))
logger.addHandler(handler)
logger.setLevel(logging.INFO)
handler.show_logs()
handler.clear_logs()
logger.info('Starting program')
try:
logger.info('About to try something dangerous...')
1.0/0.0
except Exception as e:
logger.exception('An error occurred!')
Image Browser widget¶
from sklearn import datasets
digits = datasets.load_digits()
digits.images.shape
plt.imshow(digits.images[0,:,:],cmap='gray')
def browse_image(digits):
n = len(digits.images)
def view_image(i):
plt.imshow(digits.images[i], cmap=plt.cm.gray_r, interpolation='nearest')
# plt.title("Training: ", digits.target[i])
plt.show()
interact(view_image, i=(0,n-1))
return
browse_image(digits)
Image Browser¶
from io import BytesIO
from IPython.display import Image
from ipywidgets import interact, interactive, fixed
import matplotlib as mpl
from skimage import data, filters, io, img_as_float
import pdb
arr = img_as_float(data.coffee())
print(arr.min(), arr.max())
def arr2img(arr):
""" Display 2d or 3d numpy array as an image."""
if arr.ndim == 2:
format, cmap = 'png', mpl.cm.gray
elif arr.ndim == 3:
format, cmap = 'jpg', None
else:
raise ValueError("input array should be either 2 or 3 dim")
#Don't let matplotlib autoscale the color range (to the input array's min and max)
vmin, vmax = (0, 255) if arr.dtype=='uint8' else (0., 1.)
with BytesIO() as buffer:
mpl.image.imsave(buffer, arr, vmin=vmin, vmax=vmax, format=format, cmap=cmap)
out = buffer.getvalue()
return Image(out)
arr2img(data.coins())
Now let's create a simple function to edit this image. For instance, we can toggle each channel or blur the image.
def edit_img(arr, sigma=0.1, R=1.0, G=1.0, B=1.0):
""" Apply a blurring to the numpy array"""
output = filters.gaussian(arr, sigma=sigma, multichannel=True)
colors = [R, G, B]
if arr.ndim == 2:
output = G * output
elif arr.ndim > 2:
for c in range(output.ndim):
output[:,:,c] = colors[c] * output[:,:,c]
display(arr2img(output))
# return arr2img(output)
edit_img(data.text(), sigma=0., G=0.5);
Let's use ipywidgets
's interactive functionality to see the effect of these parameters
limits = (0., 1., 0.01)
interactive(edit_img, arr=fixed(arr), sigma=(0., 5., 0.5), R=limits, G=limits, B=limits)
def choose_img(name):
global img
arr = getattr(data, name)()
return arr2img(arr)
interact(choose_img, name=sorted(set(data.__all__) - {'load', 'data_dir', 'lfw_subset', 'stereo_motorcycle'}));
temp = data.page();
print(temp.ndim)
print(temp.dtype)
plt.imshow(temp,cmap=mpl.cm.gray)
temp.min(), temp.max()
def edit_image(image, sigma, R,G,B):
new_image =
Now, let's set the image path to the spacenet dataset datapath
Sample dataset path¶
DATA = Path("/home/hayley/Data_Spacenet/")
# Sample dataset
sample_dir = Path("/home/hayley/Data_Spacenet/SpaceNet_Roads_Sample/")
sample_root_dirs = [sample_dir/ city for city in ["AOI_2_Vegas_Roads_Sample",
"AOI_3_Paris_Roads_Sample",
"AOI_4_Shanghai_Roads_Sample",
"AOI_5_Khartoum_Roads_Sample"]
]
sample_rgb_dirs = [root/"RGB-PanSharpen" for root in sample_root_dirs]
sample_vec_dirs = [root/"geojons/spacenetroads" for root in sample_root_dirs]
sample_buff_vec_dirs = [root/"geojson/buffer" for root in sample_root_dirs]
def get_short_name(fname):
"""Extract a short name from pathlib.Path object to fname"""
if isinstance(fname, str):
fname = Path(fname)
return "_".join(fname.name.split("_")[1:])
sample_vegas_rgbs = [fname for fname in sample_rgb_dirs[0].iterdir() if fname.suffix == '.tif']
print(str(sample_vegas_rgbs[0]))
print(short_name(sample_vegas_rgbs[0]))
import apls_tools
apls_tools.convert_to_8Bit(
import rasterio
gmin, gmax = float('inf'), -float('inf')
for i in range(len(sample_vegas_rgbs)):
arr = rasterio.open(sample_vegas_rgbs[i]).read()
cmin, cmax = arr.min(), arr.max()
if cmin < gmin:
gmin = cmin
if cmax > gmax:
gmax = cmax
print("global min, max: ", gmin, ", ", gmax)
data = ds.read()
print(data.shape)
print(data.min(), data.max())
def get_global_min_max(fnames):
"""compute the min and max of the image arrays in the fname list"""
gmin, gmax = float('inf'), -float('inf')
for fname in fnames:
arr = rasterio.open(fname).read()
cmin, cmax = arr.min(), arr.max()
if cmin < gmin:
gmin = cmin
if cmax > gmax:
gmax = cmax
print("global min, max: ", gmin, ", ", gmax)
return gmin, gmax
get_global_min_max(sample_vegas_rgbs)
parent = sample_vegas_rgbs
out_dir = 'RGB-PanSharpen-8bits'
i=0
for in_path in sample_vegas_rgbs:
in_name = get_short_name(in_path)
out_name = "_".join([out_dir,in_name])
out_path = in_path.parent.parent /out_dir/ out_name
print("outname: ", str(outname))
apls_tools.convert_to_8Bit(str(in_path), str(out_path))
if i>10:
break
i+=1
def batch_convert_to_8Bit(in_rgb_dir, out_dir_name='RGB-PanSharpen-8bits', verbose=False):
if isinstance(in_rgb_dir, str):
in_rgb_dir = Path(in_rgb_dir)
# set output directory
out_rgb_dir = in_rgb_dir.parent / out_dir_name
print("in: ", str(in_rgb_dir))
print("out: ", str(out_rgb_dir))
if not out_rgb_dir.exists():
out_rgb_dir.mkdir()
# convert each image to 8 bit
for i,in_path in enumerate(in_rgb_dir.iterdir()):
if in_path.suffix != '.tif': continue
in_name = get_short_name(in_path)
out_name = "_".join([out_dir_name,in_name])
out_path = out_rgb_dir/ out_name
apls_tools.convert_to_8Bit(str(in_path), str(out_path))
if (verbose and i%100==0):
print("i :", i)
print("outname: ", str(out_name))
batch_convert_to_8Bit(sample_rgb_dirs[0],verbose=True)
Convert all sample rgb images to 8bits¶
for sample_rgb_dir in sample_rgb_dirs:
batch_convert_to_8Bit(sample_rgb_dir)
Now let's make sure this conversion makes sense. We can check with QGIS.
Convert all training rgb images to 8bits¶
vegas_root = Path("/home/hayley/Data_Spacenet/AOI_2_Vegas_Roads_Train/")
paris_root = Path("/home/hayley/Data_Spacenet/AOI_3_Paris_Roads_Train/")
shanghai_root = Path("/home/hayley/Data_Spacenet/AOI_4_Shanghai_Roads_Train/")
k_root = Path("/home/hayley/Data_Spacenet/AOI_5_Khartoum_Roads_Train/")
train_root_dirs = [vegas_root, paris_root, shanghai_root, k_root]
train_rgb_dirs = [root/"RGB-PanSharpen" for root in train_root_dirs]
train_vec_dirs = [root/"geojson/spacenetroads" for root in train_root_dirs]
train_buff_vec_dirs = [root/"geojson/buffer" for root in train_root_dirs]
for train_rgb_dir in train_rgb_dirs:
batch_convert_to_8Bit(train_rgb_dir, verbose=True)
Now, let's make sure the number of 8bits files are the same as in the original training rgb images
from collections import Counter, defaultdict
c = defaultdict(dict)
original_counts = dict()
converted_counts = dict()
for i,train_rgb_dir in enumerate(train_rgb_dirs):
train_rgb_8bits_dir = train_rgb_dir.parent/'RGB-PanSharpen-8bits'
original_counts[i] = Counter(f.suffix for f in train_rgb_dir.iterdir())
converted_counts[i] = Counter(f.suffix for f in train_rgb_8bits_dir.iterdir())
print("Original:")
print(original_counts)
print("8bits:")
print(converted_counts)
Perfect, so let's last double check a couple of them with QGIS to make sure the conversion is done right.
Okay, great. so now we have accomplished 16 RGB -> 8 bit RGB conversion. This will be useful for visualizing the buffer raster results. For model training, however, we will probably use the 16 bit images since it has more information.
Before moving on, I would like to check if any of the raster images contain an invalid data. First I check which values are considered invalid from the tif
file's metadata:
sample_vegas_buff_vecs = sorted(list(f for f in sample_buff_vec_dirs[0].iterdir() if f.suffix == '.geojson'))
sample_vegas_8bit_rgbs = sorted(list(f for f in (sample_rgb_dir.parent/'RGB-PanSharpen-8bits').iterdir() if f.suffix=='.tif'))
sample_vegas_16bit_rgbs = sorted(list(f for f in sample_rgb_dir.iterdir() if f.suffix=='.tif'))
ds_16 = rasterio.open(sample_vegas_16bit_rgbs[0])
ds_8 = rasterio.open(sample_vegas_8bit_rgbs[0])
print("no data values: ")
print(ds_16.nodatavals)
print(ds_8.nodatavals)
ds_16.nodata
Great, so None
incidcates invalid/missing values in the original RGB raster images.
Check if there is any invalid data in the 16bit and 8bit RGB images.¶
Now, let's see if any of the original 16bit RGB or processed 8bit RGB raster images contains None
.
from collections import defaultdict
%timeit
nones = [] # list of filenames (pathlib Path object) that contains any invalid value
zeros = {} #key=filename, value=number of zeros
for train_rgb_dir in train_rgb_dirs:
print("="*80)
print(str(train_rgb_dir.parent.name))
for i,f in enumerate(train_rgb_dir.iterdir()):
if i%99 == 0:
print(i)
if f.suffix != '.tif': continue
with rasterio.open(f, 'r') as ds:
arr = ds.read()
has_none = np.any(np.isnan(arr))
num_zeros = np.sum([True if val<1e-6 else False for val in arr.ravel()])
# pdb.set_trace()
if has_none:
nones.append(f)
if num_zeros > 0:
zeros[str(f.name)] = num_zeros
print("Number of files with any invalid value")
print(len(nones))
print("Number of files with any zero: ")
print(len(zeros))
[True if i<5 else False for i in range(10)]
Awesome, so there is no invalid value in the training dataset. How about value of 0 then? I'm inspecting this in preparation for the later process of generating road-buffer masks from buffer vector files. I'm going to check this at the same time when checking on None
values in the above.
import fiona
from rasterio.mask import mask
from rasterio.plot import reshape_as_image, reshape_as_raster
ds = rasterio.open(sample_vegas_8bit_rgbs[0])
ds.crs
arr = ds.read()
plt.imshow(reshape_as_image(arr))
with fiona.open(sample_vegas_buff_vecs[0]) as buff_file:
geoms = [feature['geometry'] for feature in buff_file]
rasterio##todo: set outside of geometry to burn as -inf or none etc
with rasterio.open(sample_vegas_8bit_rgbs[0]) as src:
# ones = np.array(
out_img, out_transform = mask(src, geoms, crop=True, )
out_img
out_meta = src.meta.copy()
out_meta.update({"driver": "GTiff",
"height": out_img.shape[1],
"width": out_img.shape[2],
"transform": out_transform})
with rasterio.open("./temp.tif", "w", **out_meta) as dest:
dest.write(out_img)
with rasterio.open(sample_vegas_8bit_rgbs[0]) as src:
src_im = reshape_as_image(src.read())
print(np.any(np.isnan(src_im)))
print(np.sum(np.any(src_im<1e-6)))
with rasterio.open('./temp.tif') as f:
mask = reshape_as_image(f.read())
print(mask.min(), mask.max())
count = Counter(mask.ravel())
print(count)
pdb.set_trace()
mask [ mask != 0] = 255
plt.imshow(mask)
def generate_buffer_vecs(road_vec_dir):
"""
Given a path to `road_vec_dir`, generate a buffer vector file for each road vec file in that directory
based on the road_type and num_lanes
Args:
- road_vec_dir (pathlib.Path): path to a directory containing spacenet road vector files (.geojson)
Returns:
- buff_vec_dir (pathlib.Path): path to a new directory containing the processed buffer vector files
It is generated under the `geojson` folder called `buffer`. This `buffer` directory contains
buffer vector files for each road vec in the input directory.
"""
#to implement
pass