(:~
: Mark Logic Search String to XML Utility
:
: Copyright 2008 Ryan Grimm
:
: Licensed under the Apache License, Version 2.0 (the "License");
: you may not use this file except in compliance with the License.
: You may obtain a copy of the License at
:
: http://www.apache.org/licenses/LICENSE-2.0
:
: Unless required by applicable law or agreed to in writing, software
: distributed under the License is distributed on an "AS IS" BASIS,
: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
: See the License for the specific language governing permissions and
: limitations under the License.
:
: @author Ryan Grimm (grimm@xqdev.com)
: @version 0.5
:
:)
module "http://marklogic.com/commons/query-xml"
declare namespace stox = "http://marklogic.com/commons/query-xml"
default function namespace = "http://www.w3.org/2003/05/xpath-functions"
(:~
: Takes a search string as the input and returns a xml document that can be
: used in constructing a cts:search()
:
: @param $fields list of fields that you would like to be parsed. For example,
: if you were google you would set the fields to something like:
: ("link", "site", "filetype", "allintitle", "allintext", "allinurl", "allinanchor")
:
: @param $operators list of operators that you would like to look for at the
: beginning of each search term. If you would like to support negation and
: thesaurus lookups you could set the operators to:
: ("-", "~")
: Note: The operators can be any character but can only be one character long.
:
: @param $modes list of modes that you would like to have parsed out. This is handy
: when you want to enable searches that use words like "OR" or "AND" or "NOT".
: The two terms that are on either side of the mode will get a mode attribute
: assinged to them. The downside is that in some cases you might not know how to
: group the modes when constructing your query.
:
: @return A xml document that simplifies constructing a query
:
:)
define function stox:searchToXml(
$search as xs:string,
$fields as xs:string*,
$operators as xs:string*,
$modes as xs:string*,
$pops as xs:string*
) as element(search)
{
stox:_collapse(
let $newsearch := string-join(
if (count(tokenize($search, '"')) > 2)
then
for $i at $count in tokenize($search, '"')
return
if ($count mod 2 = 0)
then replace($i, "\s+", "!+!")
else $i
else $search, '')
let $terms := tokenize($newsearch, "\s+")
for $term at $count in $terms
let $tokens := tokenize($term, ":")
let $rawToken := stox:_stripOps($tokens[1], $operators, $pops)
where not($term = $modes)
return
if (count($tokens) > 1)
then
if ($fields[. = $rawToken])
then { (
stox:_getMode($modes, $terms, $count)
,
stox:_getOp($tokens[1], $operators)
,
stox:_getPop($tokens[2], $pops)
,
attribute { "field" } { stox:_stripOps($tokens[1], $operators, $pops) }
,
replace(string-join($tokens[2 to count($tokens)], ":"), "!\+!", " ")
) }
else { (
stox:_getMode($modes, $terms, $count)
,
stox:_getOp($tokens[1], $operators)
,
stox:_getPop($tokens[1], $pops)
,
stox:_stripOps(replace(stox:_stripOps(string-join($tokens, ":"), $operators, $pops), "!\+!", " "), $operators, $pops) )
}
else if ($tokens[1])
then { (
stox:_getMode($modes, $terms, $count)
,
stox:_getOp($tokens[1], $operators)
,
stox:_getPop($tokens[1], $pops)
,
replace(stox:_stripOps($tokens[1], $operators, $pops), "!\+!", " ")
) }
else ()
)
}
define function stox:_getMode(
$modes as xs:string*,
$terms as xs:string*,
$index as xs:integer
) as attribute()?
{
if($terms[$index - 1] = $modes or $terms[$index + 1] = $modes)
then attribute mode {
let $prev := $terms[$index - 1]
let $next := $terms[$index + 1]
return
if($next = $modes)
then $next
else $prev
}
else ()
}
(:~
: Returns a 'op' attribute if the first character of the given term has one
: of the specified operators
:
: @param $term the search term to get the operator from
:
: @param $operators list of operators that you would like to look for in the term
:
: @return a 'op' attrubute if the first character of the given term has one of
: the specified operators
:)
define function stox:_getOp(
$term as xs:string,
$ops as xs:string*
) as attribute()*
{
let $op := substring($term, 1, 1)
return
if ($op = $ops)
then attribute op { $op }
else ()
}
define function stox:_getPop(
$term as xs:string,
$pops as xs:string*
) as attribute()*
{
let $pop := substring($term, string-length($term), 1)
return
if ($pop = $pops)
then attribute postop { $pop }
else ()
}
(:~
: Removes the leading operator from the term if it exists
:
: @param $term the search term to strip the operator from
:
: @param $operators list of operators that you would like to look for in the term
:
: @return the term with the operator removed if it exists
:)
define function stox:_stripOps(
$term as xs:string,
$ops as xs:string*,
$pops as xs:string
) as xs:string
{
let $op := substring($term, 1, 1)
let $pop := substring($term, string-length($term), 1)
let $stripedOp :=
if ($op = $ops)
then substring($term, 2)
else $term
return
if ($pop = $pops)
then substring($stripedOp, 1, string-length($stripedOp) - 1)
else $stripedOp
}
define function stox:_collapse(
$terms as element(term)*
) as element(search)
{
{
let $pos := 0
for $term in $terms
let $set := xdmp:set($pos, $pos + 1)
let $term := $terms[$pos]
return
if(exists($term/@field) and string-length($term) = 0)
then { (
$term/@*,
string($terms[$pos + 1]),
xdmp:set($pos, $pos + 1)
) }
else $terms[$pos]
}
}