ranger hive 脱敏
看起来ranger hive plugin只需要在hive的插件里设置 row filter regex正则, column mask expression 脱敏表达式, 真正的实现倒像是由hive自身完成了. 咨询了chatgpt, 说是由ranger提前修改的sql, 还得再看看.
原理上倒是很容易理解, 底层sql里添加row filter的where语句, 或者底层sql包裹了select 脱敏的语句, 都能够实现.
package org.apache.hadoop.hive.ql.security.authorization.plugin;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;
/**
* Represents the object on which privilege is being granted/revoked, and objects
* being used in queries.
*
* Check the get* function documentation for information on what value it returns based on
* the {@link HivePrivilegeObjectType}.
*
*/
@LimitedPrivate(value = { "Apache Argus (incubating)" })
@Evolving
public class HivePrivilegeObject implements Comparable<HivePrivilegeObject> {
public List<String> getCellValueTransformers() {
return cellValueTransformers;
}
public void setCellValueTransformers(List<String> cellValueTransformers) {
this.cellValueTransformers = cellValueTransformers;
}
public String getRowFilterExpression() {
return rowFilterExpression;
}
public void setRowFilterExpression(String rowFilterExpression) {
this.rowFilterExpression = rowFilterExpression;
}
}
public List<HivePrivilegeObject> applyRowFilterAndColumnMasking(HiveAuthzContext queryContext, List<HivePrivilegeObject> hiveObjs) throws SemanticException {
@Override
public List<HivePrivilegeObject> applyRowFilterAndColumnMasking(HiveAuthzContext queryContext, List<HivePrivilegeObject> hiveObjs) throws SemanticException {
List<HivePrivilegeObject> ret = new ArrayList<HivePrivilegeObject>();
if(LOG.isDebugEnabled()) {
LOG.debug("==> applyRowFilterAndColumnMasking(" + queryContext + ", objCount=" + hiveObjs.size() + ")");
}
RangerPerfTracer perf = null;
if(RangerPerfTracer.isPerfTraceEnabled(PERF_HIVEAUTH_REQUEST_LOG)) {
perf = RangerPerfTracer.getPerfTracer(PERF_HIVEAUTH_REQUEST_LOG, "RangerHiveAuthorizer.applyRowFilterAndColumnMasking()");
}
if(CollectionUtils.isNotEmpty(hiveObjs)) {
IMetaStoreClient metaStoreClient = getMetaStoreClient();
for (HivePrivilegeObject hiveObj : hiveObjs) {
HivePrivilegeObjectType hiveObjType = hiveObj.getType();
if(hiveObjType == null) {
hiveObjType = HivePrivilegeObjectType.TABLE_OR_VIEW;
}
if(LOG.isDebugEnabled()) {
LOG.debug("applyRowFilterAndColumnMasking(hiveObjType=" + hiveObjType + ")");
}
boolean needToTransform = false;
if (hiveObjType == HivePrivilegeObjectType.TABLE_OR_VIEW) {
String database = hiveObj.getDbname();
String table = hiveObj.getObjectName();
String rowFilterExpr = getRowFilterExpression(queryContext, hiveObj, metaStoreClient);
if (StringUtils.isNotBlank(rowFilterExpr)) {
if(LOG.isDebugEnabled()) {
LOG.debug("rowFilter(database=" + database + ", table=" + table + "): " + rowFilterExpr);
}
hiveObj.setRowFilterExpression(rowFilterExpr);
needToTransform = true;
}
if (CollectionUtils.isNotEmpty(hiveObj.getColumns())) {
List<String> columnTransformers = new ArrayList<String>();
for (String column : hiveObj.getColumns()) {
boolean isColumnTransformed = addCellValueTransformerAndCheckIfTransformed(queryContext, hiveObj, column, columnTransformers, metaStoreClient);
if(LOG.isDebugEnabled()) {
LOG.debug("addCellValueTransformerAndCheckIfTransformed(database=" + database + ", table=" + table + ", column=" + column + "): " + isColumnTransformed);
}
needToTransform = needToTransform || isColumnTransformed;
}
hiveObj.setCellValueTransformers(columnTransformers);
}
}
if (needToTransform) {
ret.add(hiveObj);
}
}
}
RangerPerfTracer.log(perf);
if(LOG.isDebugEnabled()) {
LOG.debug("<== applyRowFilterAndColumnMasking(" + queryContext + ", objCount=" + hiveObjs.size() + "): retCount=" + ret.size());
}
return ret;
}
...
}