Skip to main content

ranger hive 脱敏

看起来ranger hive plugin只需要在hive的插件里设置 row filter regex正则, column mask expression 脱敏表达式, 真正的实现倒像是由hive自身完成了. 咨询了chatgpt, 说是由ranger提前修改的sql, 还得再看看.

原理上倒是很容易理解, 底层sql里添加row filter的where语句, 或者底层sql包裹了select 脱敏的语句, 都能够实现.


package org.apache.hadoop.hive.ql.security.authorization.plugin;

import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;

import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;

/**
* Represents the object on which privilege is being granted/revoked, and objects
* being used in queries.
*
* Check the get* function documentation for information on what value it returns based on
* the {@link HivePrivilegeObjectType}.
*
*/
@LimitedPrivate(value = { "Apache Argus (incubating)" })
@Evolving
public class HivePrivilegeObject implements Comparable<HivePrivilegeObject> {


public List<String> getCellValueTransformers() {
return cellValueTransformers;
}

public void setCellValueTransformers(List<String> cellValueTransformers) {
this.cellValueTransformers = cellValueTransformers;
}

public String getRowFilterExpression() {
return rowFilterExpression;
}

public void setRowFilterExpression(String rowFilterExpression) {
this.rowFilterExpression = rowFilterExpression;
}
}

    public List<HivePrivilegeObject> applyRowFilterAndColumnMasking(HiveAuthzContext queryContext, List<HivePrivilegeObject> hiveObjs) throws SemanticException {

@Override
public List<HivePrivilegeObject> applyRowFilterAndColumnMasking(HiveAuthzContext queryContext, List<HivePrivilegeObject> hiveObjs) throws SemanticException {
List<HivePrivilegeObject> ret = new ArrayList<HivePrivilegeObject>();

if(LOG.isDebugEnabled()) {
LOG.debug("==> applyRowFilterAndColumnMasking(" + queryContext + ", objCount=" + hiveObjs.size() + ")");
}

RangerPerfTracer perf = null;

if(RangerPerfTracer.isPerfTraceEnabled(PERF_HIVEAUTH_REQUEST_LOG)) {
perf = RangerPerfTracer.getPerfTracer(PERF_HIVEAUTH_REQUEST_LOG, "RangerHiveAuthorizer.applyRowFilterAndColumnMasking()");
}

if(CollectionUtils.isNotEmpty(hiveObjs)) {
IMetaStoreClient metaStoreClient = getMetaStoreClient();

for (HivePrivilegeObject hiveObj : hiveObjs) {
HivePrivilegeObjectType hiveObjType = hiveObj.getType();

if(hiveObjType == null) {
hiveObjType = HivePrivilegeObjectType.TABLE_OR_VIEW;
}

if(LOG.isDebugEnabled()) {
LOG.debug("applyRowFilterAndColumnMasking(hiveObjType=" + hiveObjType + ")");
}

boolean needToTransform = false;

if (hiveObjType == HivePrivilegeObjectType.TABLE_OR_VIEW) {
String database = hiveObj.getDbname();
String table = hiveObj.getObjectName();

String rowFilterExpr = getRowFilterExpression(queryContext, hiveObj, metaStoreClient);

if (StringUtils.isNotBlank(rowFilterExpr)) {
if(LOG.isDebugEnabled()) {
LOG.debug("rowFilter(database=" + database + ", table=" + table + "): " + rowFilterExpr);
}

hiveObj.setRowFilterExpression(rowFilterExpr);
needToTransform = true;
}

if (CollectionUtils.isNotEmpty(hiveObj.getColumns())) {
List<String> columnTransformers = new ArrayList<String>();

for (String column : hiveObj.getColumns()) {
boolean isColumnTransformed = addCellValueTransformerAndCheckIfTransformed(queryContext, hiveObj, column, columnTransformers, metaStoreClient);

if(LOG.isDebugEnabled()) {
LOG.debug("addCellValueTransformerAndCheckIfTransformed(database=" + database + ", table=" + table + ", column=" + column + "): " + isColumnTransformed);
}

needToTransform = needToTransform || isColumnTransformed;
}

hiveObj.setCellValueTransformers(columnTransformers);
}
}

if (needToTransform) {
ret.add(hiveObj);
}
}
}

RangerPerfTracer.log(perf);

if(LOG.isDebugEnabled()) {
LOG.debug("<== applyRowFilterAndColumnMasking(" + queryContext + ", objCount=" + hiveObjs.size() + "): retCount=" + ret.size());
}

return ret;
}
...
}