2021-09-28 20:55:27 +00:00
|
|
|
:PROPERTIES:
|
|
|
|
:ID: 7b0f97f3-9037-4d05-9170-a478e97c8d1f
|
|
|
|
:END:
|
2021-09-29 21:24:49 +00:00
|
|
|
#+title: Modeling the new search DSL
|
2021-09-28 20:55:27 +00:00
|
|
|
|
|
|
|
Defining and translating the Search DSL for the [[id:11edd6c9-b976-403b-a419-b5542ddedaae][Subscriber Search Service]].
|
|
|
|
|
|
|
|
* Searches
|
|
|
|
** A search is a collection of groupings
|
|
|
|
#+begin_src python :noweb-ref search
|
|
|
|
@dataclasses.dataclass
|
|
|
|
class Search:
|
2021-09-29 21:24:49 +00:00
|
|
|
group: Group
|
|
|
|
# TODO: sorting : Sorting
|
2021-09-28 20:55:27 +00:00
|
|
|
#+end_src
|
|
|
|
|
2021-10-08 20:06:24 +00:00
|
|
|
#+begin_src yaml :noweb-ref search-yaml
|
|
|
|
Search:
|
|
|
|
type: object
|
|
|
|
properties:
|
|
|
|
group:
|
|
|
|
$ref: "#/components/schemas/Group"
|
|
|
|
#+end_src
|
2021-09-28 20:55:27 +00:00
|
|
|
** A grouping is a collection of conditions
|
|
|
|
#+begin_src python :noweb-ref group
|
|
|
|
class GroupType(enum.Enum):
|
|
|
|
AND = 1
|
|
|
|
# TODO: OR = 2
|
|
|
|
|
|
|
|
|
|
|
|
@dataclasses.dataclass
|
|
|
|
class Group:
|
|
|
|
group_type: GroupType
|
|
|
|
conditions: typing.List[Condition]
|
|
|
|
#+end_src
|
|
|
|
|
2021-10-08 20:06:24 +00:00
|
|
|
#+begin_src yaml :noweb-ref group-yaml
|
|
|
|
Group:
|
|
|
|
type: object
|
|
|
|
properties:
|
|
|
|
group_type:
|
|
|
|
enum:
|
|
|
|
- "AND"
|
|
|
|
conditions:
|
|
|
|
type: array
|
|
|
|
items:
|
|
|
|
$ref: "#/components/schemas/Condition"
|
|
|
|
|
|
|
|
#+end_src
|
2021-09-29 21:24:49 +00:00
|
|
|
** A condition is a filter applied to a field
|
2021-09-28 20:55:27 +00:00
|
|
|
#+begin_src python :noweb-ref condition
|
|
|
|
@dataclasses.dataclass
|
|
|
|
class Condition:
|
2021-09-29 21:24:49 +00:00
|
|
|
filter: Filter
|
|
|
|
match : str
|
|
|
|
#+end_src
|
|
|
|
|
2021-10-08 20:06:24 +00:00
|
|
|
#+begin_src yaml :noweb-ref condition-yaml
|
|
|
|
Condition:
|
|
|
|
type: object
|
|
|
|
properties:
|
|
|
|
filter:
|
|
|
|
$ref: "#/components/schemas/Filter"
|
|
|
|
match:
|
|
|
|
type: string
|
|
|
|
#+end_src
|
2021-09-29 21:24:49 +00:00
|
|
|
** A filter is a boolean expression applied to a field with an optional argument
|
|
|
|
|
|
|
|
#+begin_src python :noweb-ref filter
|
|
|
|
class InputType(enum.Enum):
|
|
|
|
Nothing = 1
|
|
|
|
String = 2
|
|
|
|
Date = 3
|
|
|
|
Tag = 4
|
|
|
|
TagSet = 5
|
|
|
|
Message = 6
|
|
|
|
|
|
|
|
|
|
|
|
@dataclasses.dataclass
|
|
|
|
class Filter:
|
2021-09-28 20:55:27 +00:00
|
|
|
operator: str
|
2021-09-29 21:24:49 +00:00
|
|
|
field: Field
|
|
|
|
input_type: InputType
|
2021-09-28 20:55:27 +00:00
|
|
|
#+end_src
|
|
|
|
|
|
|
|
** A field refers to a specific database field somewhere in our system
|
|
|
|
#+begin_src python :noweb-ref field
|
|
|
|
class Database(enum.Enum):
|
|
|
|
AppDB = 1
|
|
|
|
Analytics = 2
|
|
|
|
|
|
|
|
|
|
|
|
@dataclasses.dataclass
|
|
|
|
class FieldType:
|
|
|
|
name: str
|
|
|
|
|
|
|
|
|
|
|
|
@dataclasses.dataclass
|
|
|
|
class Field:
|
|
|
|
name: str
|
|
|
|
column: str
|
|
|
|
table: str
|
|
|
|
database: Database
|
|
|
|
#+end_src
|
|
|
|
|
2021-09-29 21:24:49 +00:00
|
|
|
** Available filters
|
|
|
|
*** Subscriber email is x
|
|
|
|
#+begin_src python :noweb-ref fields
|
|
|
|
email = Field(
|
|
|
|
name="email",
|
|
|
|
column="email",
|
|
|
|
table="subscribers",
|
|
|
|
database=Database.AppDB,
|
|
|
|
)
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
#+begin_src python :noweb-ref filters
|
|
|
|
email = Filter(field=fields.email, operator="is", input_type=InputType.String)
|
|
|
|
#+end_src
|
2021-10-08 20:06:24 +00:00
|
|
|
|
|
|
|
#+begin_src yaml :noweb-ref filters-spec
|
|
|
|
#+end_src
|
2021-09-29 21:24:49 +00:00
|
|
|
** Sample searches
|
|
|
|
|
|
|
|
*** Match subscriber email
|
|
|
|
#+begin_src python :noweb-ref searches
|
|
|
|
Search(
|
|
|
|
group=Group(
|
|
|
|
group_type=GroupType.AND,
|
|
|
|
conditions=[Condition(filter=filters.email, match="test@example.org")],
|
|
|
|
)
|
|
|
|
)
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
* SQL Generation
|
|
|
|
|
|
|
|
#+begin_src python :noweb-ref builder
|
|
|
|
def to_sql(search: Search) -> str:
|
|
|
|
tables: typing.Set[str] = {"subscribers"}
|
|
|
|
tables = tables | {
|
|
|
|
condition.filter.field.table for condition in search.group.conditions
|
|
|
|
}
|
2021-09-28 20:55:27 +00:00
|
|
|
|
2021-09-29 21:24:49 +00:00
|
|
|
def condition_to_sql(condition: Condition):
|
|
|
|
field = ".".join([condition.filter.field.table, condition.filter.field.column])
|
|
|
|
return f"{field} {condition.filter.operator} {condition.match}"
|
|
|
|
|
|
|
|
def group_to_sql(group: Group) -> str:
|
|
|
|
operator = "AND" if search.group.group_type == GroupType.AND else "OR"
|
|
|
|
clauses = f" {operator} ".join(
|
|
|
|
[condition_to_sql(condition) for condition in group.conditions]
|
|
|
|
)
|
|
|
|
return f"({clauses})"
|
|
|
|
|
|
|
|
where = group_to_sql(search.group)
|
|
|
|
return f"""SELECT * FROM {', '.join(tables)} WHERE {where}"""
|
|
|
|
#+end_src
|
2021-09-28 20:55:27 +00:00
|
|
|
* Decisions
|
|
|
|
|
2021-09-29 21:24:49 +00:00
|
|
|
** DONE Should the input type presented to the end-user be tied to the database field or the conditional operator?
|
2021-09-28 20:55:27 +00:00
|
|
|
Seems it should be the operator, as an "equals" operator would match a single
|
|
|
|
value, whereas an "in" operator would match against multiple. That said, it
|
|
|
|
could be /parameterized/ by the field's type (e.g. a tag has type =str=, its
|
|
|
|
"equals" operator has type =str=, its "in" operator has type =List[str]=).
|
|
|
|
|
2021-09-29 21:24:49 +00:00
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
The input type will be defined as a property of the filter being applied.
|
|
|
|
|
2021-10-08 20:06:24 +00:00
|
|
|
** DONE Should the search service maintain a set of filters, or field types and operators?
|
2021-09-29 21:24:49 +00:00
|
|
|
- A filter is a combination of a field, an operator, and a type
|
|
|
|
- A field has a type, and operators could be defined that work with a type or set of types
|
|
|
|
|
|
|
|
For the former, the service would have total control over the search filters
|
|
|
|
available to the UI, and the UI would be coupled to the filter collection. With
|
|
|
|
the latter, the UI would have total control over which fields it's able to
|
|
|
|
search on and how, provided the fields are available.
|
2021-10-08 20:06:24 +00:00
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
The search service will maintain a set of filters.
|
2021-09-29 21:24:49 +00:00
|
|
|
** TODO How should the values of each filter be represented in the request schema?
|
|
|
|
Should they be normalized to strings, or should we allow any type and validate
|
|
|
|
it when we attempt to build the search data model? If the latter, could the
|
|
|
|
available filters be baked into the OpenAPI schema?
|
|
|
|
** TODO How should the SQL be generated for each filter?
|
|
|
|
Should a SQL template or generation function be attached to each filter?
|
|
|
|
** TODO How do we want to define the joins for the various tables that may come into play?
|
|
|
|
We'll have to know, one way or another, how to narrow the records from the
|
|
|
|
joined table. Will they all be joined by the subscriber id, or will we need to
|
|
|
|
maintain a map?
|
|
|
|
|
2021-09-28 20:55:27 +00:00
|
|
|
* Code
|
2021-10-08 20:06:24 +00:00
|
|
|
** Python
|
2021-09-28 20:55:27 +00:00
|
|
|
#+begin_src python :noweb yes :noweb-ref final :exports code :results silent
|
|
|
|
import dataclasses
|
|
|
|
import enum
|
|
|
|
import typing
|
2021-10-08 20:06:24 +00:00
|
|
|
|
|
|
|
|
2021-09-28 20:55:27 +00:00
|
|
|
<<field>>
|
2021-10-08 20:06:24 +00:00
|
|
|
|
|
|
|
|
2021-09-29 21:24:49 +00:00
|
|
|
<<filter>>
|
2021-10-08 20:06:24 +00:00
|
|
|
|
|
|
|
|
2021-09-28 20:55:27 +00:00
|
|
|
<<condition>>
|
2021-10-08 20:06:24 +00:00
|
|
|
|
|
|
|
|
2021-09-28 20:55:27 +00:00
|
|
|
<<group>>
|
2021-10-08 20:06:24 +00:00
|
|
|
|
|
|
|
|
2021-09-28 20:55:27 +00:00
|
|
|
<<search>>
|
2021-10-08 20:06:24 +00:00
|
|
|
|
|
|
|
|
2021-09-29 21:24:49 +00:00
|
|
|
<<builder>>
|
2021-10-08 20:06:24 +00:00
|
|
|
|
|
|
|
|
2021-09-29 21:24:49 +00:00
|
|
|
class fields:
|
|
|
|
<<fields>>
|
2021-10-08 20:06:24 +00:00
|
|
|
|
|
|
|
|
2021-09-29 21:24:49 +00:00
|
|
|
class filters:
|
|
|
|
<<filters>>
|
2021-10-08 20:06:24 +00:00
|
|
|
|
|
|
|
|
2021-09-29 21:24:49 +00:00
|
|
|
searches = [
|
|
|
|
<<searches>>,
|
|
|
|
]
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
#+RESULTS:
|
|
|
|
|
|
|
|
#+caption: Mypy analysis
|
|
|
|
#+begin_src bash :noweb yes :results output :exports results
|
|
|
|
mypy <(cat <<'EOF'
|
|
|
|
<<final>>
|
|
|
|
EOF) 2>&1 || true
|
2021-09-28 20:55:27 +00:00
|
|
|
#+end_src
|
2021-09-29 21:24:49 +00:00
|
|
|
|
|
|
|
#+RESULTS:
|
|
|
|
: Success: no issues found in 1 source file
|
2021-10-08 20:06:24 +00:00
|
|
|
** OpenAPI
|
2021-09-29 21:24:49 +00:00
|
|
|
* Output
|
|
|
|
#+caption: Generated queries
|
|
|
|
#+begin_src python :noweb yes :exports results
|
|
|
|
<<final>>
|
|
|
|
|
|
|
|
return [[to_sql(search)] for search in searches]
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
#+RESULTS:
|
|
|
|
| SELECT * FROM subscribers WHERE (subscribers.email is test@example.org) |
|