Source code for restpose.client

# -*- coding: utf-8 -
#
# This file is part of the restpose python module, released under the MIT
# license.  See the COPYING file for more information.

"""
The RestPose client mirrors the resources provided by the RestPose server as
Python objects.

.. testsetup::

    from restpose import Server
    coll = Server().collection('test_coll')

"""

import six
from .resource import RestPoseResource
from .query import Query, QueryAll, QueryNone, QueryField, QueryMeta, \
                   SearchResults
import query
from .errors import RestPoseError, CheckPointExpiredError

[docs]class Server(object):
    """Representation of a RestPose server.

    Allows indexing, searching, status management, etc.

    """

    _resource_class = RestPoseResource

    def __init__(self, uri='http://127.0.0.1:7777',
                 resource_class=None,
                 resource_instance=None,
                 **client_opts):
        """
        :param uri: Full URI to the top path of the server.

        :param resource_class: If specified, defines a resource class to use
               instead of the default class.  This should usually be a subclass
               of :class:`RestPoseResource`.

        :param resource_instance: If specified, defines a resource instance to
               use instead of making one with the default class (or the class
               specified by `resource_class`.

        :param client_opts: Parameters to use to update the existing
               client_opts in the resource (if `resource_instance` is
               specified), or to use when creating the resource (if
               `resource_class` is specified).

        """
        self.uri = uri = uri.rstrip('/')

        if resource_class is not None:
            self._resource_class = resource_class

        if resource_instance:
            self._resource = resource_instance.clone()
            self._resource.initial['uri'] = uri
            self._resource.client_opts.update(client_opts)
        else:
            self._resource = self._resource_class(uri, **client_opts)

    @property
[docs]    def status(self):
        """Get server status.

        Returns a dictionary holding the status as returned from the server.
        See the server documentation for details.

        """
        return self._resource.get('/status').expect_status(200).json

    @property
[docs]    def collections(self):
        """Get a list of existing collections.

        Returns a list of collection names (as strings).

        """
        return list(self._resource.get('/coll').expect_status(200).json.keys())

[docs]    def collection(self, coll_name):
        """Access to a collection.

        :param coll_name: The name of the collection to access.

        :returns: a Collection object which can be used to search and modify the
                  contents of the Collection.

        .. note:: No request is performed directly by this method; a Collection
                  object is simply created which will make requests when
                  needed.  For this reason, no error will be reported at this
                  stage even if the collection does not exist, or if a
                  collection name containing invalid characters is used.

        """
        return Collection(self, coll_name)


[docs]class FieldQueryFactory(object):
    """Object for creating searches on a field.

    """

    def __init__(self, target=None):
        """
        :param target: The target to pass to the Query objects created.

        """
        #: The target that will be used when creating Query objects.  Defaults
        #: to None.
        self.target = target

    def __call__(self, fieldname):
        """Create a FieldQuerySource for the given fieldname.

        This is mainly intended for use for fieldnames which are stored in a
        parameter, or which are reserved or invalid Python identifiers.

        """
        return FieldQuerySource(fieldname, self.target)

    def __getattr__(self, fieldname):
        """Get a FieldQuerySource for the given fieldname.

        The FieldQuerySource has various operators used to build queries.

        """
        return FieldQuerySource(fieldname, self.target)


[docs]class FieldQuerySource(object):
    """An object which generates queries for a specific field.

    """
    def __init__(self, fieldname, target=None):
        """
        :param fieldname: The name of the field to generate queries for.  If
               set to None, will generate queries across all fields.
        :param target: The target to generate queries pointing to.

        """
        self.fieldname = fieldname
        self.target = target

[docs]    def is_in(self, values):
        """Create a query for fields which exactly match the given values.

        A document will match if at least one of the stored values for the
        field exactly matches at least one of the given values.

        This query type is currently available only for "exact", "id" and "cat"
        field types.

        :param value: A container holding the values to search for.  As a
               special case, if a string is supplied, this is equivalent to
               supplying a container holding that string.

        :example:

            Search for documents in which the "tag" field has a value of
            "edam", "cheddar" or "leicester".

            >>> query = coll.field.tag.is_in(['edam', 'cheddar', 'leicester'])

            Search for documents in which the "tag" field has a value of
            "edam".

            >>> query = coll.field.tag.is_in('edam')

        """
        return QueryField(self.fieldname, 'is', values, target=self.target)

    # FIXME - add "is_descendant" and "is_or_is_descendant"

    def __eq__(self, value):
        """Create a query for fields which exactly match the given value.

        Matches documents in which the supplied value exactly matches the
        stored value.

        This query type is currently available only for "exact", "id" and "cat"
        field types.

        This query type may be constructed using the == operator, or the
        ``equals`` method.

        :param value: The value to search for.

        :example:

            Search for documents in which the "tag" field has a value of
            "edam".

            >>> query = coll.field.tag.equals('edam')

            Or, equivalently (but less conveniently for chained calls)

            >>> query = (coll.field.tag == 'edam')

        """
        return QueryField(self.fieldname, 'is', (value,), target=self.target)
    equals = __eq__

[docs]    def range(self, begin, end):
        """Create a query for field values in a given range.

        Matches documents in which one of the stored values in the field are in
        the specified range, including both the begin and end values.

        This type is currently available only for "double", "date" and
        "timestamp" field types.

        :param begin: The start of the range.
        :param end: The end of the range.

        :example:

            Search for documents in which the "num" field has a value in the
            range 0 to 10 (including the endpoints).

            >>> query = coll.field.num.range(0, 10)

        """
        return QueryField(self.fieldname, 'range', (begin, end),
                          target=self.target)

[docs]    def text(self, text, op="phrase", window=None):
        """Create a query for a piece of text in the field.

        This is a simple search for a matching sequences of words (subject to
        whatever processing has been performed on the field to conflate variant
        forms of words, such as stemming or word splitting for CJK text).

        :param text: The text to search for.  If empty, this query will
               match no results.
        :param op: The operator to use when searching.  One of "or", "and",
               "phrase" (ordered proximity), "near" (unordered proximity).
               Default="phrase".
        :param window: Only relevant if op is "phrase" or "near". Window size
               in words within which the words in the text need to occur for a
               document to match; None=length of text. Integer or None.
               Default=None

        :example:

            Search for documents in which the "text" field contains text
            matching the phrase "Hello world".

            >>> query = coll.field.text.text("Hello world")

        """
        value = dict(text=text)
        if op is not None:
            value['op'] = op
        if window is not None:
            value['window'] = window
        return QueryField(self.fieldname, 'text', value, target=self.target)

[docs]    def parse(self, text, op="and"):
        """Parse a structured query, searching the field.

        Unlike text, this allows various operators to be used in the query; for
        example, parentheses may be used, and operators such as "AND" may be
        used 

        .. todo:: Document the operators permitted.

        Beware that the parser is unable to make sense of some query strings
        (eg, those with mismatched parentheses).  If such a query string is
        used, an error will be returned by the server when the search is
        performed.

        :param fieldname: The field to search within.
        :param text: Text to search for.  If empty, this query will match no
               results.
        :param op: The default operator to use when searching.  One of "or",
               "and".  Default="and".

        :example:

            Search for documents in which the "text" field contains both
            "Hello" and "world", but not "big".

            >>> query = coll.field.text.text("Hello world -big")

        """
        value = dict(text=text)
        if op is not None:
            value['op'] = op
        return QueryField(self.fieldname, 'parse', value, target=self.target)

[docs]    def exists(self):
        """Search for documents in which the field exists.

        This type may be used to search across all fields.

        :example:

            Search for documents in which the "text" field exists.

            >>> query = coll.field.text.exists()

            Search for documents in which any field exists.

            >>> query = coll.any_field.exists()

        """
        return QueryMeta('exists', (self.fieldname,), target=self.target)

[docs]    def nonempty(self):
        """Search for documents in which the field has a non-empty value.

        This type may be used to search across all fields.

        :example:

            Search for documents in which the "text" field has a non-empty
            value.

            >>> query = coll.field.text.nonempty()

            Search for documents in which any field has a non-empty value.

            >>> query = coll.any_field.nonempty()

        """
        return QueryMeta('nonempty', (self.fieldname,), target=self.target)

[docs]    def empty(self):
        """Search for documents in which the field has an empty value.

        This type may be used to search across all fields.

        :example:

            Search for documents in which the "text" field has an empty
            value.

            >>> query = coll.field.text.empty()

            Search for documents in which any field has an empty value.

            >>> query = coll.any_field.empty()

        """
        return QueryMeta('empty', (self.fieldname,), target=self.target)

[docs]    def has_error(self):
        """Search for documents in which the field produced errors when
        parsing.

        This type may be used to search across all fields.

        :example:

            Search for documents in which the "text" field had an error when
            parsing.

            >>> query = coll.field.text.has_error()

            Search for documents in which any field had an error when parsing.

            >>> query = coll.any_field.has_error()

        """
        return QueryMeta('error', (self.fieldname,), target=self.target)


Field = FieldQueryFactory()
AnyField = FieldQuerySource(fieldname=None)

[docs]class QueryTarget(object):
    """An object which can be used to make and run queries.

    """
    def __init__(self):
        #: Factory for field-specific queries.
        self.field = FieldQueryFactory(target=self)

        #: Pseudo field for making queries across all fields.
        self.any_field = FieldQuerySource(fieldname=None, target=self)

[docs]    def all(self):
        """Create a query which matches all documents."""
        return QueryAll(target=self)

[docs]    def none(self):
        """Create a query which matches no documents."""
        return QueryNone(target=self)

[docs]    def find(self, q):
        """Apply a Query to this QueryTarget.

        :param q: A Query object which will have the target applied to it.

        """
        return q.set_target(self)

[docs]    def search(self, search):
        """Perform a search.

        :param search: is a search structure to be sent to the server, or a
                       Search or Query object.

        """
        if hasattr(search, '_build_search'):
            body = search._build_search()
        else:
            body = search
        result = self._resource.post(self._basepath + "/search",
                                     payload=body).json
        return SearchResults(result)


[docs]class Document(object):
    def __init__(self, collection, doc_type, doc_id):
        if collection is None:
            # doc_type should be a DocumentType object.
            self._resource = doc_type._resource
            self._path = doc_type._basepath + '/id/' + doc_id
        else:
            # doc_type should be a string.
            self._resource = collection._resource
            self._path = collection._basepath + '/type/' + doc_type + '/id/' + doc_id
        self._data = None
        self._terms = None
        self._values = None
        self._raw = None

    def _fetch(self):
        self._raw = self._resource.get(self._path).expect_status(200).json
        self._data = self._raw.get('data', {})
        self._terms = self._raw.get('terms', {})
        self._values = self._raw.get('values', {})

    @property
[docs]    def data(self):
        if self._raw is None:
            self._fetch()
        return self._data

    @property
[docs]    def terms(self):
        if self._raw is None:
            self._fetch()
        return self._terms

    @property
[docs]    def values(self):
        if self._raw is None:
            self._fetch()
        return self._values


[docs]class DocumentType(QueryTarget):
    def __init__(self, collection, doc_type):
        super(DocumentType, self).__init__()
        self._basepath = collection._basepath + '/type/' + doc_type
        self._resource = collection._resource

[docs]    def add_doc(self, doc, doc_id=None):
        """Add a document to the collection.

        """
        path = self._basepath
        use_put = True

        if doc_id is None:
            use_put = False
        else:
            path += '/id/%s' % doc_id

        if use_put:
            resp = self._resource.put(path, payload=doc)
        else:
            resp = self._resource.post(path, payload=doc)

        return resp.expect_status(202)

[docs]    def delete_doc(self, doc_id):
        """Delete a document with this type from the collection.

        """
        path = '%s/id/%s' % (self._basepath, doc_id)
        return self._resource.delete(path).expect_status(202)

[docs]    def get_doc(self, doc_id):
        return Document(None, self, doc_id)


[docs]class Collection(QueryTarget):
    def __init__(self, server, coll_name):
        super(Collection, self).__init__()
        self._basepath = '/coll/' + coll_name
        self._resource = server._resource

[docs]    def doc_type(self, doc_type):
        return DocumentType(self, doc_type)

    @property
[docs]    def status(self):
        """The status of the collection.

        """
        return self._resource.get(self._basepath).expect_status(200).json

    @property
    def config(self):
        """The configuration of the collection.

        """
        return self._resource.get(self._basepath + '/config') \
              .expect_status(200).json

    @config.setter
[docs]    def config(self, value):
        self._resource.put(self._basepath + '/config', payload=value) \
            .expect_status(202)

[docs]    def add_doc(self, doc, doc_type=None, doc_id=None):
        """Add a document to the collection.

        """
        path = self._basepath
        use_put = True

        if doc_type is None:
            use_put = False
        else:
            path += '/type/%s' % doc_type

        if doc_id is None:
            use_put = False
        else:
            path += '/id/%s' % doc_id

        if use_put:
            resp = self._resource.put(path, payload=doc)
        else:
            resp = self._resource.post(path, payload=doc)

        return resp.expect_status(202)

[docs]    def delete_doc(self, doc_type, doc_id):
        """Delete a document from the collection.

        """
        path = '%s/type/%s/id/%s' % (self._basepath, doc_type, doc_id)
        return self._resource.delete(path).expect_status(202)

[docs]    def get_doc(self, doc_type, doc_id):
        """Get a document from the collection.

        """
        return Document(self, doc_type, doc_id)

[docs]    def checkpoint(self, commit=True):
        """Set a checkpoint on the collection.

        This creates a resource on the server which can be queried to detect
        whether indexing has reached the checkpoint yet.  All updates sent
        before the checkpoint will be processed before indexing reaches the
        checkpoint, and no updates sent after the checkpoint will be processed
        before indexing reaches the checkpoint.

        """
        path = self._basepath + "/checkpoint"
        params_dict = {}
        if commit:
            params_dict['commit'] = '1'
        else:
            params_dict['commit'] = '0'
        return CheckPoint(self, self._resource
                          .post(path, params_dict=params_dict)
                          .expect_status(201)
                          .json.get('checkid'))

[docs]    def delete(self):
        """Delete the entire collection.

        """
        return self._resource.delete(self._basepath).expect_status(202)


[docs]class CheckPoint(object):
    """A checkpoint, used to check the progress of indexing.

    """
    def __init__(self, collection, check_id):
        self._check_id = check_id
        self._basepath = collection._basepath + '/checkpoint/' + self._check_id
        self._resource = collection._resource

        # The raw representation of the checkpoint, as returned from the
        # request, or None if the checkpoint hasn't been reached or expired, or
        # 'expired' if the checkpoint has expired.
        self._raw = None

    @property
[docs]    def check_id(self):
        """The ID of the checkpoint.

        This is used to identify the checkpoint on the server.

        """
        return self._check_id

    def _refresh(self):
        """Contact the server, and get the status of the checkpoint.

        If the checkpoint referred to by this Checkpoint instance has
        previously been found to have been reached or expired, this doesn't
        contact the server, since the checkpoint should no longer change at
        this point.

        """
        if self._raw is None:
            resp = self._resource.get(self._basepath).expect_status(200).json
            if resp is None:
                self._raw = 'expired'
            elif resp.get('reached', False):
                self._raw = resp

        if self._raw == 'expired':
            raise CheckPointExpiredError("Checkpoint %s expired" %
                                         self.check_id)

    @property
[docs]    def reached(self):
        """Return true if the checkpoint has been reached.

        May contact the server to check the current state.

        Raises CheckPointExpiredError if the checkpoint expired before the
        state was checked.

        """
        self._refresh()
        return self._raw is not None and self._raw != 'expired'

    @property
[docs]    def errors(self):
        """Return the list of errors associated with the CheckPoint.

        Note that if there are many errors, only the first few will be
        returned.

        Returns None if the checkpoint hasn't been reached yet.

        Raises CheckPointExpiredError if the checkpoint expired before the
        state was checked.

        """
        self._refresh()
        return self._raw.get('errors', [])

    @property
[docs]    def total_errors(self):
        """Return the total count of errors associated with the CheckPoint.

        This may be larger than len(self.errors), if there were more errors
        than the CheckPoint is able to hold.

        Returns None if the checkpoint hasn't been reached yet.

        Raises CheckPointExpiredError if the checkpoint expired before the
        state was checked.

        """
        self._refresh()
        return self._raw.get('total_errors', 0)

[docs]    def wait(self):
        """Wait for the checkpoint to be reached.

        This will contact the server, and wait until the checkpoint has been
        reached.

        If the checkpoint expires (before or during the call), a
        CheckPointExpiredError will be raised.  Otherwise, this will return the
        checkpoint, so that further methods can be chained on it.

        """
        while True:
            self._refresh()
            if self._raw is not None:
                return self
            # FIXME - sleep a bit.  Currently the server doesn't long-poll for
            # the checkpoint, so we need to sleep to avoid using lots of CPU.
            import time
            time.sleep(1)
Navigation

Source code for restpose.client

Project Versions

Quick search

Navigation