"use strict";
/*!
* Copyright 2016 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
Object.defineProperty(exports, "__esModule", { value: true });
const arrify = require("arrify");
const escapeStringRegexp = require("escape-string-regexp");
const is = require("is");
const isUtf8 = require('is-utf8');
const mutation_1 = require("./mutation");
/**
* @private
*/
class FilterError extends Error {
constructor(filter) {
super();
this.name = 'FilterError';
this.message = `Unknown filter: ${filter}.`;
}
}
exports.FilterError = FilterError;
/**
* A filter takes a row as input and produces an alternate view of the row based
* on specified rules. For example, a row filter might trim down a row to
* include just the cells from columns matching a given regular expression, or
* might return all the cells of a row but not their values. More complicated
* filters can be composed out of these components to express requests such as,
* "within every column of a particular family, give just the two most recent
* cells which are older than timestamp X."
*
* There are two broad categories of filters (true filters and transformers),
* as well as two ways to compose simple filters into more complex ones
* ({@link Filter#interleave}). They work as follows:
*
* True filters alter the input row by excluding some of its cells wholesale
* from the output row. An example of a true filter is the
* {@link Filter#value} filter, which excludes cells whose values
* don't match the specified pattern. All regex true filters use RE2 syntax
* (https://github.com/google/re2/wiki/Syntax) and are evaluated as full
* matches. An important point to keep in mind is that RE2(.) is equivalent by
* default to RE2([^\n]), meaning that it does not match newlines. When
* attempting to match an arbitrary byte, you should therefore use the escape
* sequence '\C', which may need to be further escaped as '\\C' in your client
* language.
*
* Transformers alter the input row by changing the values of some of its
* cells in the output, without excluding them completely. Currently, the only
* supported transformer is the {@link Filter#value} `strip` filter,
* which replaces every cell's value with the empty string.
*
* The total serialized size of a filter message must not
* exceed 4096 bytes, and filters may not be nested within each other to a depth
* of more than 20.
*
* Use the following table for the various examples found throughout the
* filter documentation.
*
* | Row Key | follows:gwashington | follows:jadams | follows:tjefferson |
* | ----------- |:-------------------:|:--------------:|:------------------:|
* | gwashington | | 1 | |
* | tjefferson | 1 | 1 | |
* | jadams | 1 | | 1 |
*
* @class
*/
class Filter {
constructor() {
this.filters_ = [];
}
/**
* @throws TypeError
*
* Transforms Arrays into a simple regular expression for matching multiple
* values.
*
* @param {regex|string|string[]} regex Either a plain regex, a regex in
* string form or an array of strings.
*
* @returns {string}
*
* @example
* var regexString = Filter.convertToRegExpString(['a', 'b', 'c']);
* // => '(a|b|c)'
*/
static convertToRegExpString(regex) {
if (is.regexp(regex)) {
return regex.toString().replace(/^\/|\/$/g, '');
}
if (is.array(regex)) {
return `(${regex.map(Filter.convertToRegExpString).join('|')})`;
}
if (is.string(regex)) {
return regex;
}
if (is.number(regex)) {
return regex.toString();
}
if (Buffer.isBuffer(regex)) {
const encodingToUse = isUtf8(regex) ? 'utf8' : 'binary';
const regexToEscape = regex.toString(encodingToUse);
const escapedString = escapeStringRegexp(regexToEscape);
return Buffer.from(escapedString, encodingToUse);
}
throw new TypeError("Can't convert to RegExp String from unknown type.");
}
/**
* Creates a range object. All bounds default to inclusive.
*
* @param {?object|string} start Lower bound value.
* @param {?object|string} end Upper bound value.
* @param {string} key Key used to create range value keys.
*
* @returns {object}
*
* @example
* const Bigtable = require('@google-cloud/bigtable');
* const Filter = Bigtable.Filter;
*
* var range = Filter.createRange('value1', 'value2', 'Test');
* // {
* // startTestInclusive: new Buffer('value1'),
* // endTestExclusive: new Buffer('value2')
* // }
*
* //-
* // It's also possible to pass in objects to specify inclusive/exclusive
* // bounds.
* //-
* var upperBound = {
* value: 'value3',
* inclusive: false
* };
*
* var range = Filter.createRange(upperBound, null, 'Test2');
* // => {
* // startTest2Exclusive: 'value3'
* // }
*/
static createRange(start, end, key) {
const range = {};
if (start) {
Object.assign(range, createBound('start', start, key));
}
if (end) {
Object.assign(range, createBound('end', end, key));
}
return range;
function createBound(boundName, boundData, key) {
const isInclusive = boundData.inclusive !== false;
const boundKey = boundName + key + (isInclusive ? 'Closed' : 'Open');
const bound = {};
bound[boundKey] = mutation_1.Mutation.convertToBytes(boundData.value || boundData);
return bound;
}
}
/**
* @throws FilterError
*
* Turns filters into proto friendly format.
*
* @param {object[]} filters The list of filters to be parsed.
*
* @returns {object}
*
* @example
* var filter = Filter.parse([
* {
* family: 'my-family',
* }, {
* column: 'my-column'
* }
* ]);
* // {
* // chain: {
* // filters: [
* // {
* // familyNameRegexFilter: 'my-family'
* // },
* // {
* // columnQualifierRegexFilter: 'my-column'
* // }
* // ]
* // }
* // }
*/
static parse(filters) {
const filter = new Filter();
arrify(filters).forEach(filterObj => {
const key = Object.keys(filterObj)[0];
if (!is.function(filter[key])) {
throw new FilterError(key);
}
filter[key](filterObj[key]);
});
return filter.toProto();
}
/**
* Sets passAllFilter or blockAllFilter
*
* @param {boolean} pass Whether to passAllFilter or blockAllFilter
*
* Assign true for enabling passAllFilter and false for enabling blockAllFilter
*
* @example
* //-
* // Matches all cells, regardless of input. Functionally equivalent to
* // leaving `filter` unset, but included for completeness.
* //-
* var filter = {
* all: true
* };
*
* //-
* // Does not match any cells, regardless of input. Useful for temporarily
* // disabling just part of a filter.
* //-
* var filter = {
* all: false
* };
*/
all(pass) {
const filterName = pass ? 'passAllFilter' : 'blockAllFilter';
this.set(filterName, true);
}
/**
* Matches only cells from columns whose qualifiers satisfy the given RE2
* regex.
* @param {?regex|string|object} column Matching Column to filter with
*
* Note that, since column qualifiers can contain arbitrary bytes, the '\C'
* escape sequence must be used if a true wildcard is desired. The '.'
* character will not match the new line character '\n', which may be
* present in a binary qualifier.
*
* @example
* //-
* // Using the following filter, we would retrieve the `tjefferson` and
* // `gwashington` columns.
* //-
* var filter = [
* {
* column: /[a-z]+on$/
* }
* ];
*
* //-
* // You can also provide a string (optionally containing regexp characters)
* // for simple column filters.
* //-
* var filter = [
* {
* column: 'gwashington'
* }
* ];
*
* //-
* // Or you can provide an array of strings if you wish to match against
* // multiple columns.
* //-
* var filter = [
* {
* column: [
* 'gwashington',
* 'tjefferson'
* ]
* }
* ];
*
* //-
* // If you wish to use additional column filters, consider using the following
* // syntax.
* //-
* var filter = [
* {
* column: {
* name: 'gwashington'
* }
* }
* ];
*
*
* //-
* // <h4>Column Cell Limits</h4>
* //
* // Matches only the most recent number of versions within each column. For
* // example, if the `versions` is set to 2, this filter would only match
* // columns updated at the two most recent timestamps.
* //
* // If duplicate cells are present, as is possible when using an
* // {@link Filter#interleave} filter, each copy of the cell is
* // counted separately.
* //-
* var filter = [
* {
* column: {
* cellLimit: 2
* }
* }
* ];
*
* //-
* // <h4>Column Ranges</h4>
* //
* // Specifies a contiguous range of columns within a single column family.
* // The range spans from <column_family>:<start_qualifier> to
* // <column_family>:<end_qualifier>, where both bounds can be either
* // inclusive or exclusive. By default both are inclusive.
* //
* // When the `start` bound is omitted it is interpreted as an empty string.
* // When the `end` bound is omitted it is interpreted as Infinity.
* //-
* var filter = [
* {
* column: {
* family: 'follows',
* start: 'gwashington',
* end: 'tjefferson'
* }
* }
* ];
*
* //-
* // By default, both the `start` and `end` bounds are inclusive. You can
* // override these by providing an object explicity stating whether or not it
* // is `inclusive`.
* //-
* var filter = [
* {
* column: {
* family: 'follows',
* start: {
* value: 'gwashington',
* inclusive: false
* },
* end: {
* value: 'jadams',
* inclusive: false
* }
* }
* }
* ];
*/
column(column) {
if (!is.object(column)) {
column = {
name: column,
};
}
if (column.name) {
let name = Filter.convertToRegExpString(column.name);
name = mutation_1.Mutation.convertToBytes(name);
this.set('columnQualifierRegexFilter', name);
}
if (is.number(column.cellLimit)) {
this.set('cellsPerColumnLimitFilter', column.cellLimit);
}
if (column.start || column.end) {
const range = Filter.createRange(column.start, column.end, 'Qualifier');
range.familyName = column.family;
this.set('columnRangeFilter', range);
}
}
/**
* A filter which evaluates one of two possible filters, depending on
* whether or not a `test` filter outputs any cells from the input row.
*
* IMPORTANT NOTE: The `test` filter does not execute atomically with the
* pass and fail filters, which may lead to inconsistent or unexpected
* results. Additionally, condition filters have poor performance, especially
* when filters are set for the fail condition.
*
* @param {object} condition Condition to filter.
*
* @example
* //-
* // In the following example we're creating a filter that will check if
* // `gwashington` follows `tjefferson`. If he does, we'll get all of the
* // `gwashington` data. If he does not, we'll instead return all of the
* // `tjefferson` data.
* //-
* var filter = [
* {
* condition: {
* // If `test` outputs any cells, then `pass` will be evaluated on the
* // input row. Otherwise `fail` will be evaluated.
* test: [
* {
* row: 'gwashington'
* },
* {
* family: 'follows'
* },
* {
* column: 'tjefferson'
* }
* ],
*
* // If omitted, no results will be returned in the true case.
* pass: [
* {
* row: 'gwashington'
* }
* ],
*
* // If omitted, no results will be returned in the false case.
* fail: [
* {
* row: 'tjefferson'
* }
* ]
* }
* }
* ];
*/
condition(condition) {
this.set('condition', {
predicateFilter: Filter.parse(condition.test),
trueFilter: Filter.parse(condition.pass),
falseFilter: Filter.parse(condition.fail),
});
}
/**
* Matches only cells from columns whose families satisfy the given RE2
* regex. For technical reasons, the regex must not contain the ':'
* character, even if it is not being used as a literal.
* Note that, since column families cannot contain the new line character
* '\n', it is sufficient to use '.' as a full wildcard when matching
* column family names.
*
* @param {regex} family Expression to filter family
*
* @example
* var filter = [
* {
* family: 'follows'
* }
* ];
*/
family(family) {
family = Filter.convertToRegExpString(family);
this.set('familyNameRegexFilter', family);
}
/**
* Applies several filters to the data in parallel and combines the results.
*
* @param {object} filters The elements of "filters" all process a copy of the input row, and the
* results are pooled, sorted, and combined into a single output row.
* If multiple cells are produced with the same column and timestamp,
* they will all appear in the output row in an unspecified mutual order.
* All interleaved filters are executed atomically.
*
* @example
* //-
* // In the following example, we're creating a filter that will retrieve
* // results for entries that were either created between December 17th, 2015
* // and March 22nd, 2016 or entries that have data for `follows:tjefferson`.
* //-
* var filter = [
* {
* interleave: [
* [
* {
* time: {
* start: new Date('December 17, 2015'),
* end: new Date('March 22, 2016')
* }
* }
* ],
* [
* {
* family: 'follows'
* },
* {
* column: 'tjefferson'
* }
* ]
* ]
* }
* ];
*/
interleave(filters) {
this.set('interleave', {
filters: filters.map(Filter.parse),
});
}
/**
* Applies the given label to all cells in the output row. This allows
* the client to determine which results were produced from which part of
* the filter.
*
* @param {string} label Label to determine filter point
* Values must be at most 15 characters in length, and match the RE2
* pattern [a-z0-9\\-]+
*
* Due to a technical limitation, it is not currently possible to apply
* multiple labels to a cell. As a result, a chain filter may have no more than
* one sub-filter which contains a apply label transformer. It is okay for
* an {@link Filter#interleave} to contain multiple apply label
* transformers, as they will be applied to separate copies of the input. This
* may be relaxed in the future.
*
* @example
* var filter = {
* label: 'my-label'
* };
*/
label(label) {
this.set('applyLabelTransformer', label);
}
/**
* Matches only cells from rows whose keys satisfy the given RE2 regex. In
* other words, passes through the entire row when the key matches, and
* otherwise produces an empty row.
*
* @param {?regex|string|string[]} row Row format to Filter
*
* Note that, since row keys can contain arbitrary bytes, the '\C' escape
* sequence must be used if a true wildcard is desired. The '.' character
* will not match the new line character '\n', which may be present in a
* binary key.
*
* @example
* //-
* // In the following example we'll use a regular expression to match all
* // row keys ending with the letters "on", which would then yield
* // `gwashington` and `tjefferson`.
* //-
* var filter = [
* {
* row: /[a-z]+on$/
* }
* ];
*
* //-
* // You can also provide a string (optionally containing regexp characters)
* // for simple key filters.
* //-
* var filter = [
* {
* row: 'gwashington'
* }
* ];
*
* //-
* // Or you can provide an array of strings if you wish to match against
* // multiple keys.
* //-
* var filter = [
* {
* row: [
* 'gwashington',
* 'tjefferson'
* ]
* }
* ];
*
* //-
* // If you wish to use additional row filters, consider using the following
* // syntax.
* //-
* var filter = [
* {
* row: {
* key: 'gwashington'
* }
* }
* ];
*
* //-
* // <h4>Row Samples</h4>
* //
* // Matches all cells from a row with probability p, and matches no cells
* // from the row with probability 1-p.
* //-
* var filter = [
* {
* row: {
* sample: 1
* }
* }
* ];
*
* //-
* // <h4>Row Cell Offsets</h4>
* //
* // Skips the first N cells of each row, matching all subsequent cells.
* // If duplicate cells are present, as is possible when using an
* // {@link Filter#interleave}, each copy of the cell is counted
* // separately.
* //-
* var filter = [
* {
* row: {
* cellOffset: 2
* }
* }
* ];
*
* //-
* // <h4>Row Cell Limits</h4>
* //
* // Matches only the first N cells of each row.
* // If duplicate cells are present, as is possible when using an
* // {@link Filter#interleave}, each copy of the cell is counted
* // separately.
* //-
* var filter = [
* {
* row: {
* cellLimit: 4
* }
* }
* ];
*/
row(row) {
if (!is.object(row)) {
row = {
key: row,
};
}
if (row.key) {
let key = Filter.convertToRegExpString(row.key);
key = mutation_1.Mutation.convertToBytes(key);
this.set('rowKeyRegexFilter', key);
}
if (row.sample) {
this.set('rowSampleFilter', row.sample);
}
if (is.number(row.cellOffset)) {
this.set('cellsPerRowOffsetFilter', row.cellOffset);
}
if (is.number(row.cellLimit)) {
this.set('cellsPerRowLimitFilter', row.cellLimit);
}
}
/**
* Stores a filter object.
*
* @param {string} key Filter name.
* @param {*} value Filter value.
*/
set(key, value) {
const filter = {};
filter[key] = value;
this.filters_.push(filter);
}
/**
* This filter is meant for advanced use only. Hook for introspection into the
* filter. Outputs all cells directly to the output of the read rather than to
* any parent filter.
* Despite being excluded by the qualifier filter, a copy of every cell that
* reaches the sink is present in the final result.
* As with an {@link Filter#interleave} filter, duplicate cells are
* possible, and appear in an unspecified mutual order.
*
* Cannot be used within {@link Filter#condition} filter.
*
* @param {boolean} sink
*
* @example
* //-
* // Using the following filter, a copy of every cell that reaches the sink is
* // present in the final result, despite being excluded by the qualifier
* // filter
* //-
* var filter = [
* {
* family: 'follows'
* },
* {
* interleave: [
* [
* {
* all: true
* }
* ],
* [
* {
* label: 'prezzy'
* },
* {
* sink: true
* }
* ]
* ]
* },
* {
* column: 'gwashington'
* }
* ];
*
* //-
* // As with an {@link Filter#interleave} filter, duplicate cells
* // are possible, and appear in an unspecified mutual order. In this case we
* // have a duplicates with multiple `gwashington` columns because one copy
* // passed through the {@link Filter#all} filter while the other was
* // passed through the {@link Filter#label} and sink. Note that one
* // copy has label "prezzy" while the other does not.
* //-
*/
sink(sink) {
this.set('sink', sink);
}
/**
* Matches only cells with timestamps within the given range.
*
* @param {object} time Start and End time Object
*
* @example
* var filter = [
* {
* time: {
* start: new Date('December 17, 2006 03:24:00'),
* end: new Date()
* }
* }
* ];
*/
time(time) {
const range = mutation_1.Mutation.createTimeRange(time.start, time.end);
this.set('timestampRangeFilter', range);
}
/**
* If we detect multiple filters, we'll assume it's a chain filter and the
* execution of the filters will be the order in which they were specified.
*/
toProto() {
if (!this.filters_.length) {
return null;
}
if (this.filters_.length === 1) {
return this.filters_[0];
}
return {
chain: {
filters: this.filters_,
},
};
}
/**
* Matches only cells with values that satisfy the given regular expression.
* Note that, since cell values can contain arbitrary bytes, the '\C' escape
* sequence must be used if a true wildcard is desired. The '.' character
* will not match the new line character '\n', which may be present in a
* binary value.
*
* @param {?string|string[]|object} value Value to filter cells
*
* @example
* var filter = [
* {
* value: /[0-9]/
* }
* ];
*
* //-
* // You can also provide a string (optionally containing regexp characters)
* // for value filters.
* //-
* var filter = [
* {
* value: '1'
* }
* ];
*
* //-
* // You can also provide an array of strings if you wish to match against
* // multiple values.
* //-
* var filter = [
* {
* value: ['1', '9']
* }
* ];
*
* //-
* // Or you can provide a Buffer or an array of Buffers if you wish to match
* // against specfic binary value(s).
* //-
* var userInputedFaces = [Buffer.from('.|.'), Buffer.from(':-)')];
* var filter = [
* {
* value: userInputedFaces
* }
* ];
*
* //-
* // <h4>Value Ranges</h4>
* //
* // Specifies a contigous range of values.
* //
* // When the `start` bound is omitted it is interpreted as an empty string.
* // When the `end` bound is omitted it is interpreted as Infinity.
* //-
* var filter = [
* {
* value: {
* start: '1',
* end: '9'
* }
* }
* ];
*
* //-
* // By default, both the `start` and `end` bounds are inclusive. You can
* // override these by providing an object explicity stating whether or not it
* // is `inclusive`.
* //-
* var filter = [
* {
* value: {
* start: {
* value: '1',
* inclusive: false
* },
* end: {
* value: '9',
* inclusive: false
* }
* }
* }
* ];
*
* //-
* // <h4>Strip Values</h4>
* //
* // Replaces each cell's value with an emtpy string.
* //-
* var filter = [
* {
* value: {
* strip: true
* }
* }
* ];
*/
value(value) {
if (!is.object(value)) {
value = {
value,
};
}
if (value.value) {
let valueReg = Filter.convertToRegExpString(value.value);
valueReg = mutation_1.Mutation.convertToBytes(valueReg);
this.set('valueRegexFilter', valueReg);
}
if (value.start || value.end) {
const range = Filter.createRange(value.start, value.end, 'Value');
this.set('valueRangeFilter', range);
}
if (value.strip) {
this.set('stripValueTransformer', value.strip);
}
}
}
exports.Filter = Filter;
//# sourceMappingURL=filter.js.map