added WordNet synonyms extensions

2024-11-21 23:21:59 +00:00 · 2021-06-03 19:28:12 +00:00 · 2021-06-03 19:28:12 +00:00 · ed12fb5604
commit ed12fb5604
parent 876f51ab95
27 changed files with 2544 additions and 16 deletions
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -331,3 +331,4 @@ endif()
 add_subdirectory(fast_float)
 add_subdirectory(libstemmer-c-cmake)
 add_subdirectory(wordnet-blast-cmake)
--- a/contrib/boost-cmake/CMakeLists.txt
+++ b/contrib/boost-cmake/CMakeLists.txt
@ -13,11 +13,12 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
        regex
        context
        coroutine
        graph
    )
    if(Boost_INCLUDE_DIR AND Boost_FILESYSTEM_LIBRARY AND Boost_FILESYSTEM_LIBRARY AND
        Boost_PROGRAM_OPTIONS_LIBRARY AND Boost_REGEX_LIBRARY AND Boost_SYSTEM_LIBRARY AND Boost_CONTEXT_LIBRARY AND
-        Boost_COROUTINE_LIBRARY)
+        Boost_COROUTINE_LIBRARY AND Boost_GRAPH_LIBRARY)
        set(EXTERNAL_BOOST_FOUND 1)
@ -32,6 +33,7 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
        add_library (_boost_system INTERFACE)
        add_library (_boost_context INTERFACE)
        add_library (_boost_coroutine INTERFACE)
        add_library (_boost_graph INTERFACE)
        target_link_libraries (_boost_filesystem INTERFACE ${Boost_FILESYSTEM_LIBRARY})
        target_link_libraries (_boost_iostreams INTERFACE ${Boost_IOSTREAMS_LIBRARY})
@ -40,6 +42,7 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
        target_link_libraries (_boost_system INTERFACE ${Boost_SYSTEM_LIBRARY})
        target_link_libraries (_boost_context INTERFACE ${Boost_CONTEXT_LIBRARY})
        target_link_libraries (_boost_coroutine INTERFACE ${Boost_COROUTINE_LIBRARY})
        target_link_libraries (_boost_graph INTERFACE ${Boost_GRAPH_LIBRARY})
        add_library (boost::filesystem ALIAS _boost_filesystem)
        add_library (boost::iostreams ALIAS _boost_iostreams)
@ -48,6 +51,7 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
        add_library (boost::system ALIAS _boost_system)
        add_library (boost::context ALIAS _boost_context)
        add_library (boost::coroutine ALIAS _boost_coroutine)
        add_library (boost::graph ALIAS _boost_graph)
    else()
        set(EXTERNAL_BOOST_FOUND 0)
        message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system boost")
@ -221,4 +225,16 @@ if (NOT EXTERNAL_BOOST_FOUND)
    add_library (boost::coroutine ALIAS _boost_coroutine)
    target_include_directories (_boost_coroutine PRIVATE ${LIBRARY_DIR})
    target_link_libraries(_boost_coroutine PRIVATE _boost_context)
    # graph
    set (SRCS_GRAPH
        "${LIBRARY_DIR}/libs/graph/src/graphml.cpp"
        "${LIBRARY_DIR}/libs/graph/src/read_graphviz_new.cpp"
    )
    add_library (_boost_graph ${SRCS_GRAPH})
    add_library (boost::graph ALIAS _boost_graph)
    target_include_directories (_boost_graph PRIVATE ${LIBRARY_DIR})
 endif ()
--- a/contrib/wordnet-blast-cmake/CMakeLists.txt
+++ b/contrib/wordnet-blast-cmake/CMakeLists.txt
@ -0,0 +1,13 @@
 set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/wordnet-blast")
 set(SRCS
    "${LIBRARY_DIR}/wnb/core/info_helper.cc"
    "${LIBRARY_DIR}/wnb/core/load_wordnet.cc"
    "${LIBRARY_DIR}/wnb/core/wordnet.cc"
 )
 add_library(wnb ${SRCS})
 target_link_libraries(wnb PRIVATE boost::headers_only boost::graph)
 target_include_directories(wnb PUBLIC "${LIBRARY_DIR}")
--- a/contrib/wordnet-blast/AUTHORS
+++ b/contrib/wordnet-blast/AUTHORS
@ -0,0 +1 @@
 Ugo Jardonnet ugo.jardonnet/gmail
--- a/contrib/wordnet-blast/CMakeLists.txt
+++ b/contrib/wordnet-blast/CMakeLists.txt
@ -0,0 +1,65 @@
 CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
 PROJECT(wnb)
 # Boost dependency
 #--------------------------------------------------
 # IF (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
 #   SET (BOOST_ROOT /Developer/boost_build/) # Suggested path
 # ELSE()
 #   SET (BOOST_ROOT "/usr/include")
 # ENDIF()
 ##############
 SET (BOOST_ROOT "${ClickHouse_SOURCE_DIR}/contrib/boost")
 ##############
 MESSAGE(STATUS "** Search Boost root: ${BOOST_ROOT}")
 FIND_PACKAGE(Boost 1.70.0 COMPONENTS graph REQUIRED)
 MESSAGE(STATUS "** Boost Include: ${Boost_INCLUDE_DIR}")
 MESSAGE(STATUS "** Boost Libraries: ${Boost_LIBRARY_DIRS}")
 MESSAGE(STATUS "** Boost Libraries: ${Boost_LIBRARIES}")
 INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIR})
 LINK_DIRECTORIES(${Boost_LIBRARY_DIRS})
 # Project
 #--------------------------------------------------
 LINK_DIRECTORIES(${wnb_SOURCE_DIR}/lib)
 INCLUDE_DIRECTORIES(${PROJECT_BINARY_DIR})
 SET(PROJECT_VERSION "0.6")
 SET(ARCHIVE_NAME ${CMAKE_PROJECT_NAME}-${PROJECT_VERSION})
 ADD_CUSTOM_TARGET(dist
  COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD
  | bzip2 > ${CMAKE_BINARY_DIR}/${ARCHIVE_NAME}.tar.bz2
  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
 ADD_CUSTOM_TARGET(check
  COMMAND ./check/check.sh ./check/list.txt)
 ## Compiler flags
 IF (CMAKE_COMPILER_IS_GNUCXX)
  list(APPEND CMAKE_CXX_FLAGS " --std=c++11 -O3 -DNDEBUG -Wall -Wextra")
  #list(APPEND CMAKE_CXX_FLAGS " -g -Wall -Wextra")
 ENDIF()
 SET(WNB_SRCS wnb/core/wordnet.cc
  wnb/core/load_wordnet.cc wnb/core/info_helper.cc)
 # Executable
 #--------------------------------------------------
 ADD_EXECUTABLE (wntest wnb/main.cc ${WNB_SRCS})
 SET(EXECUTABLE_OUTPUT_PATH ${wnb_BINARY_DIR}/bin)
 # Static library
 #--------------------------------------------------
 ADD_LIBRARY(wnb ${WNB_SRCS})
 SET(LIBRARY_OUTPUT_PATH ${wnb_BINARY_DIR}/lib)
 IF (Boost_FOUND)
  TARGET_LINK_LIBRARIES(wntest ${Boost_LIBRARIES})
  TARGET_LINK_LIBRARIES(wnb ${Boost_LIBRARIES})
 ENDIF()
--- a/contrib/wordnet-blast/README
+++ b/contrib/wordnet-blast/README
@ -0,0 +1,43 @@
 =====================================================================
          WordNet Blast
 =====================================================================
 In memory access to the wordnet onthology.
 DEPENDENCIES:
        boost 1.46
        wordnet-sense-index
        colordiff (for wntest)
 INSTALL:
        cmake CMakeLists.txt
        make
 TESTS: (Beta)
        make check
 USAGE:
        #include "wordnet.hh"
        #include "wnb/nltk_similarity.hh"
        using namespace std;
        using namespace wnb;
        int main()
        {
            wordnet wn(PATH_TO_WORDNET);
            vector<synset> synsets1 = wn.get_synsets("cat");
            vector<synset> synsets2 = wn.get_synsets("dog");
            nltk_similarity similarity(wn);
            float d = similarity(synsets1[0], synsets2[0], 6);
        }
 BUGS:
        - Word Morphing is sometimes incorrect.
 REFERENCE:
        George A. Miller (1995). WordNet: A Lexical Database for English.
        Communications of the ACM Vol. 38, No. 11: 39-41.
--- a/contrib/wordnet-blast/WORDNET_LICENSE
+++ b/contrib/wordnet-blast/WORDNET_LICENSE
@ -0,0 +1,25 @@
 This license is available as the file LICENSE in any downloaded version of
 WordNet.
 WordNet Release 3.0
 This software and database is being provided to you, the LICENSEE, by Princeton
 University under the following license.  By obtaining, using and/or copying this
 software and database, you agree that you have read, understood, and will comply
 with these terms and conditions.: Permission to use, copy, modify and distribute
 this software and database and its documentation for any purpose and without fee
 or royalty is hereby granted, provided that you agree to comply with the
 following copyright notice and statements, including the disclaimer, and that
 the same appear on ALL copies of the software, database and documentation,
 including modifications that you make for internal use or for distribution.
 WordNet 3.0 Copyright 2006 by Princeton University.  All rights reserved.  THIS
 SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON UNIVERSITY MAKES NO
 REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT
 LIMITATION, PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF
 MERCHANT- ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE
 LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY
 PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. The name of Princeton
 University or Princeton may not be used in advertising or publicity pertaining
 to distribution of the software and/or database.  Title to copyright in this
 software, database and any associated documentation shall at all times remain
 with Princeton University and LICENSEE agrees to preserve same.
--- a/contrib/wordnet-blast/changelog
+++ b/contrib/wordnet-blast/changelog
@ -0,0 +1,11 @@
 * 0.6
 	- Improve tests
 	- get_synsets by pos
 	- Load wordnet a bit faster
 	- Fix build on Mac Os (thanks to Roman Kutlak)
 	- Update doc
 	- Improve testing
 * 0.5
 	- get_synsets
 	 with morphing partially implemented (thanks to Yaron Feigin)
 	- sense similarity
--- a/contrib/wordnet-blast/check/biglist.txt
+++ b/contrib/wordnet-blast/check/biglist.txt
@ -0,0 +1,852 @@
 a
 able
 about
 account
 acid
 across
 act
 addition
 adjustment
 advertisement
 after
 again
 against
 agreement
 air
 all
 almost
 among
 amount
 amusement
 and
 angle
 angry
 animal
 answer
 ant
 any
 apparatus
 apple
 approval
 arch
 argument
 arm
 army
 art
 as
 at
 attack
 attempt
 attention
 attraction
 authority
 automatic
 awake
 baby
 back
 bad
 bag
 balance
 ball
 band
 base
 basin
 basket
 bath
 be
 beautiful
 because
 bed
 bee
 before
 behaviour
 belief
 bell
 bent
 berry
 between
 bird
 birth
 bit
 bite
 bitter
 black
 blade
 blood
 blow
 blue
 board
 boat
 body
 boiling
 bone
 book
 boot
 bottle
 box
 boy
 brain
 brake
 branch
 brass
 bread
 breath
 brick
 bridge
 bright
 broken
 brother
 brown
 brush
 bucket
 building
 bulb
 burn
 burst
 business
 but
 butter
 button
 by
 cake
 camera
 canvas
 card
 care
 carriage
 cart
 cat
 cause
 certain
 chain
 chalk
 chance
 change
 cheap
 cheese
 chemical
 chest
 chief
 chin
 church
 circle
 clean
 clear
 clock
 cloth
 cloud
 coal
 coat
 cold
 collar
 colour
 comb
 come
 comfort
 committee
 common
 company
 comparison
 competition
 complete
 complex
 condition
 connection
 conscious
 control
 cook
 copper
 copy
 cord
 cork
 cotton
 cough
 country
 cover
 cow
 crack
 credit
 crime
 cruel
 crush
 cry
 cup
 cup
 current
 curtain
 curve
 cushion
 damage
 danger
 dark
 daughter
 day
 dead
 dear
 death
 debt
 decision
 deep
 degree
 delicate
 dependent
 design
 desire
 destruction
 detail
 development
 different
 digestion
 direction
 dirty
 discovery
 discussion
 disease
 disgust
 distance
 distribution
 division
 do
 dog
 door
 doubt
 down
 drain
 drawer
 dress
 drink
 driving
 drop
 dry
 dust
 ear
 early
 earth
 east
 edge
 education
 effect
 egg
 elastic
 electric
 end
 engine
 enough
 equal
 error
 even
 event
 ever
 every
 example
 exchange
 existence
 expansion
 experience
 expert
 eye
 face
 fact
 fall
 false
 family
 far
 farm
 fat
 father
 fear
 feather
 feeble
 feeling
 female
 fertile
 fiction
 field
 fight
 finger
 fire
 first
 fish
 fixed
 flag
 flame
 flat
 flight
 floor
 flower
 fly
 fold
 food
 foolish
 foot
 for
 force
 fork
 form
 forward
 fowl
 frame
 free
 frequent
 friend
 from
 front
 fruit
 full
 future
 garden
 general
 get
 girl
 give
 glass
 glove
 go
 goat
 gold
 good
 government
 grain
 grass
 great
 green
 grey
 grip
 group
 growth
 guide
 gun
 hair
 hammer
 hand
 hanging
 happy
 harbour
 hard
 harmony
 hat
 hate
 have
 he
 head
 healthy
 hear
 hearing
 heart
 heat
 help
 high
 history
 hole
 hollow
 hook
 hope
 horn
 horse
 hospital
 hour
 house
 how
 humour
 I
 ice
 idea
 if
 ill
 important
 impulse
 in
 increase
 industry
 ink
 insect
 instrument
 insurance
 interest
 invention
 iron
 island
 jelly
 jewel
 join
 journey
 judge
 jump
 keep
 kettle
 key
 kick
 kind
 kiss
 knee
 knife
 knot
 knowledge
 land
 language
 last
 late
 laugh
 law
 lead
 leaf
 learning
 leather
 left
 leg
 let
 letter
 level
 library
 lift
 light
 like
 limit
 line
 linen
 lip
 liquid
 list
 little
 living
 lock
 long
 look
 loose
 loss
 loud
 love
 low
 machine
 make
 male
 man
 manager
 map
 mark
 market
 married
 mass
 match
 material
 may
 meal
 measure
 meat
 medical
 meeting
 memory
 metal
 middle
 military
 milk
 mind
 mine
 minute
 mist
 mixed
 money
 monkey
 month
 moon
 morning
 mother
 motion
 mountain
 mouth
 move
 much
 muscle
 music
 nail
 name
 narrow
 nation
 natural
 near
 necessary
 neck
 need
 needle
 nerve
 net
 new
 news
 night
 no
 noise
 normal
 north
 nose
 not
 note
 now
 number
 nut
 observation
 of
 off
 offer
 office
 oil
 old
 on
 only
 open
 operation
 opinion
 opposite
 or
 orange
 order
 organization
 ornament
 other
 out
 oven
 over
 owner
 page
 pain
 paint
 paper
 parallel
 parcel
 part
 past
 paste
 payment
 peace
 pen
 pencil
 person
 physical
 picture
 pig
 pin
 pipe
 place
 plane
 plant
 plate
 play
 please
 pleasure
 plough
 pocket
 point
 poison
 polish
 political
 poor
 porter
 position
 possible
 pot
 potato
 powder
 power
 present
 price
 print
 prison
 private
 probable
 process
 produce
 profit
 property
 prose
 protest
 public
 pull
 pump
 punishment
 purpose
 push
 put
 quality
 question
 quick
 quiet
 quite
 rail
 rain
 range
 rat
 rate
 ray
 reaction
 reading
 ready
 reason
 receipt
 record
 red
 regret
 regular
 relation
 religion
 representative
 request
 respect
 responsible
 rest
 reward
 rhythm
 rice
 right
 ring
 river
 road
 rod
 roll
 roof
 room
 root
 rough
 round
 rub
 rule
 run
 sad
 safe
 sail
 salt
 same
 sand
 say
 scale
 school
 science
 scissors
 screw
 sea
 seat
 second
 secret
 secretary
 see
 seed
 seem
 selection
 self
 send
 sense
 separate
 serious
 servant
 sex
 shade
 shake
 shame
 sharp
 sheep
 shelf
 ship
 shirt
 shock
 shoe
 short
 shut
 side
 sign
 silk
 silver
 simple
 sister
 size
 skin
 skirt
 sky
 sleep
 slip
 slope
 slow
 small
 smash
 smell
 smile
 smoke
 smooth
 snake
 sneeze
 snow
 so
 soap
 society
 sock
 soft
 solid
 some
 son
 song
 sort
 sound
 soup
 south
 space
 spade
 special
 sponge
 spoon
 spring
 square
 stage
 stamp
 star
 start
 statement
 station
 steam
 steel
 stem
 step
 stick
 sticky
 stiff
 still
 stitch
 stocking
 stomach
 stone
 stop
 store
 story
 straight
 strange
 street
 stretch
 strong
 structure
 substance
 such
 sudden
 sugar
 suggestion
 summer
 sun
 support
 surprise
 sweet
 swim
 system
 table
 tail
 take
 talk
 tall
 taste
 tax
 teaching
 tendency
 test
 than
 that
 the
 then
 theory
 there
 thick
 thin
 thing
 this
 thought
 thread
 throat
 through
 through
 thumb
 thunder
 ticket
 tight
 till
 time
 tin
 tired
 to
 toe
 together
 tomorrow
 tongue
 tooth
 top
 touch
 town
 trade
 train
 transport
 tray
 tree
 trick
 trouble
 trousers
 true
 turn
 twist
 umbrella
 under
 unit
 up
 use
 value
 verse
 very
 vessel
 view
 violent
 voice
 waiting
 walk
 wall
 war
 warm
 wash
 waste
 watch
 water
 wave
 wax
 way
 weather
 week
 weight
 well
 west
 wet
 wheel
 when
 where
 while
 whip
 whistle
 white
 who
 why
 wide
 will
 wind
 window
 wine
 wing
 winter
 wire
 wise
 with
 woman
 wood
 wool
 word
 work
 worm
 wound
 writing
 wrong
 year
 yellow
 yes
 yesterday
 you
 young
--- a/contrib/wordnet-blast/check/check.sh
+++ b/contrib/wordnet-blast/check/check.sh
@ -0,0 +1,16 @@
 #!/bin/bash
 WNHOME=/usr/share/wordnet/
 check() {
    local word_list="$1"
    echo "./bin/wntest $WNHOME ${word_list}"
    time ./bin/wntest $WNHOME ${word_list} > ${word_list}.blast
    echo "for i in \`cat ${word_list}\`; do wn $i -over; done"
    time for i in `cat ${word_list}`; do wn $i -over; done > ${word_list}.wn
    echo "diff ${word_list}.wn ${word_list}.blast -b"
    colordiff -y ${word_list}.wn ${word_list}.blast -b
 }
 check "$1"
--- a/contrib/wordnet-blast/check/list.txt
+++ b/contrib/wordnet-blast/check/list.txt
@ -0,0 +1,7 @@
 cat
 lions
 city
 building
 salvation
 medications
 haven
--- a/contrib/wordnet-blast/wnb/bfs.hh
+++ b/contrib/wordnet-blast/wnb/bfs.hh
@ -0,0 +1,72 @@
 #ifndef _BFS_HH
 # define _BFS_HH
 # include <boost/graph/breadth_first_search.hpp>
 # include <boost/graph/filtered_graph.hpp>
 namespace wnb
 {
  struct synset;
  namespace bfs // breadth first search tools
  {
    /// bfs_visitor
    /// Sum distances and throw answer if target synset found
    template <typename DistanceMap>
    class distance_recorder : public boost::default_bfs_visitor
    {
    public:
      distance_recorder(DistanceMap dist, const synset& s, int max)
        : d(dist), target(s), max_length(max)
      { }
      template <typename Edge, typename Graph>
      void tree_edge(Edge e, const Graph& g) const
      {
        typename boost::graph_traits<Graph>::vertex_descriptor
          u = boost::source(e, g), v = boost::target(e, g);
        d[v] = d[u] + 1;
        if (g[v] == target)
          throw d[v];
        if (d[v] > max_length)
          throw -1;
      }
    private:
      DistanceMap d;
      const synset& target;
      int max_length;
    };
    /// Convenience function
    template <typename DistanceMap>
    distance_recorder<DistanceMap>
    record_distance(DistanceMap d, const synset& s, int m)
    {
      return distance_recorder<DistanceMap>(d, s, m);
    }
    /// This predicate function object determines which edges of the original
    /// graph will show up in the filtered graph.
    //FIXME: Do we really need a map here (check cost of property_map construction 
    // / should be light)
    template <typename PointerSymbolMap>
    struct hypo_hyper_edge {
      hypo_hyper_edge() { }
      hypo_hyper_edge(PointerSymbolMap pointer_symbol)
        : m_pointer_symbol(pointer_symbol) { }
      template <typename Edge>
      bool operator()(const Edge& e) const {
        int p_s = get(m_pointer_symbol, e);
        //see pointer symbol list in info_helper.hh
        return p_s == 1 || p_s == 2 || p_s == 3 || p_s == 4; 
      }
      PointerSymbolMap m_pointer_symbol;
    };
  } // end of wnb::bfs
 } // end of namespace wnb
 #endif /* _BFS_HH */
--- a/contrib/wordnet-blast/wnb/core/info_helper.cc
+++ b/contrib/wordnet-blast/wnb/core/info_helper.cc
@ -0,0 +1,148 @@
 #include "info_helper.hh"
 #include <iostream>
 #include <fstream>
 #include <sstream>
 #include <map>
 #include <cassert>
 namespace wnb
 {
  // Class info_helper
  /// List of pointer symbols
  const char *
  info_helper::symbols[info_helper::NB_SYMBOLS] = {
    "!" ,  // 0 Antonym
    "@" ,  // 1 Hypernym
    "@i",  // 2 Instance Hypernym
    "~" ,  // 3 Hyponym
    "~i",  // 4 Instance Hyponym
    "#m",  // 5 Member holonym
    "#s",  // 6 Substance holonym
    "#p",  // 7 Part holonym
    "%m",  // 8 Member meronym
    "%s",  // 9 Substance meronym
    "%p",  // 10 Part meronym
    "=" ,  // 11 Attribute
    "+" ,  // 12 Derivationally related form
    ";c",  // 13 Domain of synset - TOPIC
    "-c",  // 14 Member of this domain - TOPIC
    ";r",  // 15 Domain of synset - REGION
    "-r",  // 16 Member of this domain - REGION
    ";u",  // 17 Domain of synset - USAGE
    "-u",  // 18 Member of this domain - USAGE
    //The pointer_symbol s for verbs are:
    "*",   // 19 Entailment
    ">",   // 20 Cause
    "^",   // 21 Also see
    "$",   // 22 Verb Group
    //The pointer_symbol s for adjectives are:
    "&",   // 23 Similar to
    "<",   // 24 Participle of verb
    "\\",  // 25 Pertainym (pertains to noun)
    "=",   // 26 Attribute
  };
  const std::string info_helper::sufx[] = {
    /* Noun suffixes */
    "s", "ses", "xes", "zes", "ches", "shes", "men", "ies",
    /* Verb suffixes */
    "s", "ies", "es", "es", "ed", "ed", "ing", "ing",
    /* Adjective suffixes */
    "er", "est", "er", "est"
  };
  const std::string info_helper::addr[] = {
    /* Noun endings */
    "", "s", "x", "z", "ch", "sh", "man", "y",
    /* Verb endings */
    "", "y", "e", "", "e", "", "e", "",
    /* Adjective endings */
    "", "", "e", "e"
  };
  const int info_helper::offsets[info_helper::NUMPARTS] = { 0, 0, 8, 16, 0, 0 };
  const int info_helper::cnts[info_helper::NUMPARTS]    = { 0, 8, 8, 4, 0, 0 };
  void
  info_helper::update_pos_maps()
  {
    // http://wordnet.princeton.edu/wordnet/man/wndb.5WN.html#sect3
    indice_offset[UNKNOWN] = 0;
    indice_offset[N] = 0;
    indice_offset[V] = indice_offset[N] + pos_maps[N].size();
    indice_offset[A] = indice_offset[V] + pos_maps[V].size();
    indice_offset[R] = indice_offset[A] + pos_maps[A].size();
    indice_offset[S] = indice_offset[R] + pos_maps[R].size();
  }
  int info_helper::compute_indice(int offset, pos_t pos)
  {
    if (pos == S)
      pos = A;
    std::map<int,int>& map = pos_maps[pos];
    assert(pos <= 5 && pos > 0);
    return indice_offset[pos] + map[offset];
  }
  // Function definitions
  // Return relation between synset indices and offsets
  static
  std::map<int,int>
  preprocess_data(const std::string& fn)
  {
    std::map<int,int> map;
    std::ifstream file(fn.c_str());
    if (!file.is_open())
      throw std::runtime_error("preprocess_data: File not found: " + fn);
    std::string row;
    //skip header
    const unsigned int header_nb_lines = 29;
    for(std::size_t i = 0; i < header_nb_lines; i++)
      std::getline(file, row);
    int ind = 0;
    //parse data line
    while (std::getline(file, row))
    {
      std::stringstream srow(row);
      int offset;
      srow >> offset;
      map.insert(std::pair<int,int>(offset, ind));
      ind++;
    }
    file.close();
    return map;
  }
  info_helper
  preprocess_wordnet(const std::string& dn)
  {
    info_helper info;
    info.pos_maps[N] = preprocess_data((dn + "data.noun")); // noun_map
    info.pos_maps[V] = preprocess_data((dn + "data.verb")); // verb_map
    info.pos_maps[A] = preprocess_data((dn + "data.adj"));  // adj_map
    info.pos_maps[R] = preprocess_data((dn + "data.adv"));  // adv_map
    info.update_pos_maps();
    return info;
  }
 } // end of namespace wnb
--- a/contrib/wordnet-blast/wnb/core/info_helper.hh
+++ b/contrib/wordnet-blast/wnb/core/info_helper.hh
@ -0,0 +1,85 @@
 #pragma once
 # include <string>
 # include <stdexcept>
 # include <map>
 # include "pos_t.hh"
 namespace wnb
 {
  /// Useful information for wordnet in-memory import
  struct info_helper
  {
    /// Symbols' size
    static const std::size_t NB_SYMBOLS = 27;
    static const std::size_t NUMPARTS = POS_ARRAY_SIZE;
    /// List of pointer symbols
    static const char *      symbols[NB_SYMBOLS];
    static const std::string sufx[];
    static const std::string addr[];
    static const int  offsets[NUMPARTS];
    static const int  cnts[NUMPARTS];
    typedef std::map<int,int>       i2of_t;     ///< indice/offset correspondences
    typedef std::map<pos_t, i2of_t> pos_i2of_t; ///< pos / map  correspondences
    /// Constructor
    info_helper() { update_pos_maps(); }
    /// Compute the number of synsets (i.e. the number of vertex in the graph)
    unsigned nb_synsets()
    {
      typedef pos_i2of_t::iterator iter_t;
      int sum = 0;
      for (iter_t it = pos_maps.begin(); it != pos_maps.end(); it++)
        sum += (*it).second.size();
      return sum;
      //return adj_map.size() + adv_map.size() + noun_map.size() + verb_map.size();
    }
    // Given a pos return the starting indice in the graph
    int get_indice_offset(pos_t pos)
    {
      return indice_offset[pos];
    }
    /// Helper function computing global indice in graph from local offset
    int compute_indice(int offset, pos_t pos);
    /// Update a map allowing one to get the correct map given a pos
    void update_pos_maps();
    int get_symbol(const std::string& ps)
    {
      for (unsigned i = 0; i < NB_SYMBOLS; i++)
        if (ps == symbols[i])
          return i;
      throw std::runtime_error("Symbol NOT FOUND.");
    }
    pos_t get_pos(const char& c)
    {
      return get_pos_from_char(c);
    }
  public:
    // i2of_t adj_map;
    // i2of_t adv_map;
    // i2of_t noun_map;
    // i2of_t verb_map;
    pos_i2of_t  pos_maps;
    std::size_t indice_offset[POS_ARRAY_SIZE];
  };
  /// Create a new info_help based on wordnet data located in dn (../dict/)
  info_helper preprocess_wordnet(const std::string& dn);
 } // end of namespace wncpp
--- a/contrib/wordnet-blast/wnb/core/load_wordnet.cc
+++ b/contrib/wordnet-blast/wnb/core/load_wordnet.cc
@ -0,0 +1,381 @@
 #include "load_wordnet.hh"
 #include <fstream>
 #include <iostream>
 #include <sstream>
 #include <stdexcept>
 #include <algorithm>
 #include <utility>
 #include <boost/graph/adjacency_list.hpp>
 #include <boost/progress.hpp>
 #include <boost/algorithm/string.hpp>
 #include <wnb/std_ext.hh>
 #include "wordnet.hh"
 #include "info_helper.hh"
 #include "pos_t.hh"
 namespace bg = boost::graph;
 namespace wnb
 {
  namespace
  {
    // Load synset's words
    void load_data_row_words(std::stringstream& srow, synset& synset)
    {
      srow >> std::hex >> synset.w_cnt >> std::dec;
      for (std::size_t i = 0; i < synset.w_cnt; i++)
      {
        //word lex_id
        std::string word;
        srow >> word;
        synset.words.push_back(word);
        int lex_id;
        srow >> std::hex >> lex_id >> std::dec;
        synset.lex_ids.push_back(lex_id);
      }
    }
    // Add rel to graph
    void add_wordnet_rel(std::string& pointer_symbol_,// type of relation
                         int synset_offset,           // dest offset
                         pos_t pos,                   // p.o.s. of dest
                         int src,                     // word src
                         int trgt,                    // word target
                         synset& synset,              // source synset
                         wordnet& wn,                 // our wordnet
                         info_helper& info)           // helper
    {
      //if (pos == S || synset.pos == S)
      //  return; //FIXME: check where are s synsets.
      int u = synset.id;
      int v = info.compute_indice(synset_offset, pos);
      ptr p;
      p.pointer_symbol = info.get_symbol(pointer_symbol_);
      p.source = src;
      p.target = trgt;
      boost::add_edge(u,v, p, wn.wordnet_graph);
    }
    // load ptrs
    void load_data_row_ptrs(std::stringstream& srow, synset& synset,
                            wordnet& wn, info_helper& info)
    {
      srow >> synset.p_cnt;
      for (std::size_t i = 0; i < synset.p_cnt; i++)
      {
        //http://wordnet.princeton.edu/wordnet/man/wndb.5WN.html#sect3
        //pointer_symbol  synset_offset  pos  source/target
        std::string pointer_symbol_;
        int   synset_offset;
        pos_t pos;
        int   src;
        int   trgt;
        srow >> pointer_symbol_;
        srow >> synset_offset;
        char c;
        srow >> c;
        pos = info.get_pos(c);
        //print extracted edges
        //std::cout << "(" << pointer_symbol << ", " << synset_offset;
        //std::cout << ", " << pos << ")" << std::endl;
        // Extract source/target words info
        std::string src_trgt;
        srow >> src_trgt;
        std::stringstream ssrc(std::string(src_trgt,0,2));
        std::stringstream strgt(std::string(src_trgt,2,2));
        ssrc >> std::hex >> src >> std::dec;
        strgt >> std::hex >> trgt >> std::dec;
        add_wordnet_rel(pointer_symbol_, synset_offset, pos, src, trgt, synset, wn, info);
      }
    }
    // Load a synset and add it to the wordnet class.
    void load_data_row(const std::string& row, wordnet& wn, info_helper& info)
    {
      //http://wordnet.princeton.edu/wordnet/man/wndb.5WN.html#sect3
      // synset_offset lex_filenum ss_type w_cnt word lex_id [word lex_id...] p_cnt [ptr...] [frames...] | gloss
      synset synset;
      std::stringstream srow(row);
      int synset_offset;
      srow >> synset_offset;
      srow >> synset.lex_filenum;
      char ss_type;
      srow >> ss_type;
      // extra information
      synset.pos = info.get_pos(ss_type);
      synset.id  = info.compute_indice(synset_offset, synset.pos);
      // words
      load_data_row_words(srow, synset);
      // ptrs
      load_data_row_ptrs(srow, synset, wn, info);
      //frames (skipped)
      std::string tmp;
      while (srow >> tmp)
        if (tmp == "|")
          break;
      // gloss
      std::getline(srow, synset.gloss);
      // extra
      synset.sense_number = 0;
      // Add synset to graph
      wn.wordnet_graph[synset.id] = synset;
    }
    // Parse data.noun files
    void load_wordnet_data(const std::string& fn, wordnet& wn, info_helper& info)
    {
      std::ifstream fin(fn.c_str());
      if (!fin.is_open())
        throw std::runtime_error("File missing: " + fn);
      static const int MAX_LENGTH = 20480;
      char row[MAX_LENGTH];
      //skip header
      for(unsigned i = 0; i < 29; i++)
        fin.getline(row, MAX_LENGTH);
      //parse data line
      while (fin.getline(row, MAX_LENGTH))
        load_data_row(row, wn, info);
      fin.close();
    }
    //FIXME: It seems possible to replace synset_offsets with indice here.
    void load_index_row(const std::string& row, wordnet& wn, info_helper& info)
    {
      // lemma pos synset_cnt p_cnt [ptr_symbol...] sense_cnt tagsense_cnt synset_offset [synset_offset...]
      index index;
      std::stringstream srow(row);
      char pos;
      srow >> index.lemma;
      srow >> pos;
      index.pos = info.get_pos(pos); // extra data
      srow >> index.synset_cnt;
      srow >> index.p_cnt;
      std::string tmp_p;
      for (std::size_t i = 0; i < index.p_cnt; i++)
      {
        srow >> tmp_p;
        index.ptr_symbols.push_back(tmp_p);
      }
      srow >> index.sense_cnt;
      srow >> index.tagsense_cnt;
      int tmp_o;
      while (srow >> tmp_o)
      {
        index.synset_offsets.push_back(tmp_o);
        index.synset_ids.push_back(info.compute_indice(tmp_o, index.pos)); // extra data
      }
      //add synset to index list
      wn.index_list.push_back(index);
    }
    void load_wordnet_index(const std::string& fn, wordnet& wn, info_helper& info)
    {
      std::ifstream fin(fn.c_str());
      if (!fin.is_open())
        throw std::runtime_error("File Not Found: " + fn);
      static const int MAX_LENGTH = 20480;
      char row[MAX_LENGTH];
      //skip header
      const unsigned int header_nb_lines = 29;
      for(std::size_t i = 0; i < header_nb_lines; i++)
        fin.getline(row, MAX_LENGTH);
      //parse data line
      while (fin.getline(row, MAX_LENGTH))
        load_index_row(row, wn, info);
      fin.close();
    }
    void load_wordnet_exc(const std::string& dn, std::string cat,
                          wordnet& wn, info_helper&)
    {
      std::string fn = dn + cat + ".exc";
      std::ifstream fin(fn.c_str());
      if (!fin.is_open())
        throw std::runtime_error("File Not Found: " + fn);
      std::map<std::string,std::string>& exc = wn.exc[get_pos_from_name(cat)];
      std::string row;
      std::string key, value;
      while (std::getline(fin, row))
      {
        std::stringstream srow(row);
        srow >> key;
        srow >> value;
        exc[key] = value;
      }
    }
    void load_wordnet_cat(const std::string dn, std::string cat,
                          wordnet& wn, info_helper& info)
    {
      load_wordnet_data((dn + "data." + cat), wn, info);
      load_wordnet_index((dn + "index." + cat), wn, info);
      load_wordnet_exc(dn, cat, wn, info);
    }
    // FIXME: this file is not in all packaged version of wordnet
    void load_wordnet_index_sense(const std::string& dn, wordnet& wn, info_helper& info)
    {
      std::string fn = dn + "index.sense";
      std::ifstream fin(fn.c_str());
      if (!fin.is_open())
        throw std::runtime_error("File Not Found: " + fn);
      std::string row;
      std::string sense_key;
      int synset_offset;
      while (std::getline(fin, row))
      {
        std::stringstream srow(row);
        srow >> sense_key;
        // Get the pos of the lemma
        std::vector<std::string> sk = ext::split(sense_key,'%');
        std::string word = sk.at(0);
        std::stringstream tmp(ext::split(sk.at(1), ':').at(0));
        int ss_type;
        tmp >> ss_type;
        pos_t pos =  (pos_t) ss_type;
        srow >> synset_offset;
        // Update synset info
        int u = info.compute_indice(synset_offset, pos);
        int sense_number;
        srow >> sense_number;
        wn.wordnet_graph[u].sense_number += sense_number;
        int tag_cnt;
        srow >> tag_cnt;
        if (tag_cnt != 0)
          wn.wordnet_graph[u].tag_cnts.push_back( make_pair(word,tag_cnt) );
        //if (synset_offset == 2121620)
        //  std::cout << u << " " << word << " " << synset_offset << " "
        //            <<  wn.wordnet_graph[u].tag_cnt << " "
        //            <<  wn.wordnet_graph[u].words[0] << std::endl;
      }
    }
    // wn -over used info in cntlist even if this is deprecated
    // It is ok not to FIX and use this function
    void load_wordnet_cntlist(const std::string& dn, wordnet& wn, info_helper& info)
    {
      std::string fn = dn + "cntlist";
      std::ifstream fin(fn.c_str());
      if (!fin.is_open())
        throw std::runtime_error("File Not Found: " + fn);
      std::string sense_key;
      int sense_number;
      int tag_cnt;
      std::string row;
      while (std::getline(fin, row))
      {
        std::stringstream srow(row);
        srow >> sense_key;
        srow >> sense_number;
        srow >> tag_cnt;
        // Get the pos of the lemma
        std::string word = ext::split(sense_key,'%').at(0);
        std::stringstream tmp(ext::split(ext::split(sense_key,'%').at(1), ':').at(0));
        int ss_type;
        tmp >> ss_type;
        pos_t pos = (pos_t) ss_type;
        // Update synset info
        int synset_offset; // FIXME
        int u = info.compute_indice(synset_offset, pos);
        wn.wordnet_graph[u].sense_number += sense_number;
        if (tag_cnt != 0)
          wn.wordnet_graph[u].tag_cnts.push_back( make_pair(word,tag_cnt) );
      }
    }
  } // end of anonymous namespace
  void load_wordnet(const std::string& dn, wordnet& wn, info_helper& info)
  {
    // vertex added in this order a n r v
    std::string fn = dn;
    if (wn._verbose)
    {
      std::cout << std::endl;
      std::cout << "### Loading Wordnet 3.0";
      boost::progress_display show_progress(5);
      boost::progress_timer t;
      load_wordnet_cat(dn, "adj", wn, info);
      ++show_progress;
      load_wordnet_cat(dn, "noun", wn, info);
      ++show_progress;
      load_wordnet_cat(dn, "adv", wn, info);
      ++show_progress;
      load_wordnet_cat(dn, "verb", wn, info);
      ++show_progress;
      load_wordnet_index_sense(dn, wn, info);
      ++show_progress;
      std::cout << std::endl;
    }
    else
    {
      load_wordnet_cat(dn, "adj", wn, info);
      load_wordnet_cat(dn, "noun", wn, info);
      load_wordnet_cat(dn, "adv", wn, info);
      load_wordnet_cat(dn, "verb", wn, info);
      load_wordnet_index_sense(dn, wn, info);
    }
    std::stable_sort(wn.index_list.begin(), wn.index_list.end());
  }
 } // end of namespace wnb
--- a/contrib/wordnet-blast/wnb/core/load_wordnet.hh
+++ b/contrib/wordnet-blast/wnb/core/load_wordnet.hh
@ -0,0 +1,12 @@
 #pragma once
 # include "info_helper.hh"
 namespace wnb
 {
  /// forward declaration
  struct wordnet;
  /// Load the entire wordnet data base located in \p dn (typically .../dict/)
  void load_wordnet(const std::string& dn, wordnet& wn, info_helper& info);
 }
--- a/contrib/wordnet-blast/wnb/core/pos_t.hh
+++ b/contrib/wordnet-blast/wnb/core/pos_t.hh
@ -0,0 +1,61 @@
 #pragma once
 namespace wnb
 {
  static const std::size_t POS_ARRAY_SIZE = 6;
  static const char POS_ARRAY[POS_ARRAY_SIZE] = {'u', 'n', 'v', 'a', 'r', 's'};
  enum pos_t
  	{
        UNKNOWN = 0,
        N       = 1,
        V       = 2,
        A       = 3,
        R       = 4,
        S       = 5,
  	};
  inline pos_t get_pos_from_name(const std::string& pos)
  {
    if (pos == "adj")
      return A;
    if (pos == "noun")
      return N;
    if (pos == "adv")
      return R;
    if (pos == "verb")
      return V;
    if (pos == "adj sat")
      return S;
    return UNKNOWN;
  }
  inline std::string get_name_from_pos(const pos_t& pos)
  {
    switch (pos)
    {
    case A: return "adj";
    case N: return "noun";
    case R: return "adv";
    case V: return "verb";
    case S: return "adj sat";
    default: return "UNKNOWN";
    }
  }
  inline pos_t get_pos_from_char(const char& c)
  {
    switch (c)
    {
    case 'a': return A;
    case 'n': return N;
    case 'r': return R;
    case 'v': return V;
    case 's': return S;
    default: return UNKNOWN;
    }
  }
 } // end of namespace wncpp
--- a/contrib/wordnet-blast/wnb/core/wordnet.cc
+++ b/contrib/wordnet-blast/wnb/core/wordnet.cc
@ -0,0 +1,186 @@
 #include <wnb/core/wordnet.hh>
 #include <wnb/std_ext.hh>
 #include <string>
 #include <set>
 #include <algorithm>
 #include <stdexcept>
 #include <boost/graph/breadth_first_search.hpp>
 #include <boost/graph/filtered_graph.hpp>
 namespace wnb
 {
  //FIXME: Make (smart) use of fs::path
  wordnet::wordnet(const std::string& wordnet_dir, bool verbose)
    : _verbose(verbose)
  {
    if (_verbose)
    {
      std::cout << wordnet_dir << std::endl;
    }
    info = preprocess_wordnet(wordnet_dir);
    wordnet_graph = graph(info.nb_synsets());
    load_wordnet(wordnet_dir, *this, info);
    if (_verbose)
    {
      std::cout << "nb_synsets: " << info.nb_synsets() << std::endl;
    }
    //FIXME: this check is only valid for Wordnet 3.0
    //assert(info.nb_synsets() == 142335);//117659);
    assert(info.nb_synsets() > 0);
  }
  std::vector<synset>
  wordnet::get_synsets(const std::string& word, pos_t pos)
  {
    std::vector<synset> synsets;
    // morphing
    std::string mword = morphword(word, pos);
    if (mword == "")
      return synsets;
    // binary_search
    typedef std::vector<index> vi;
    std::pair<vi::iterator,vi::iterator> bounds = get_indexes(mword);
    vi::iterator it;
    for (it = bounds.first; it != bounds.second; it++)
    {
      if (pos == pos_t::UNKNOWN || it->pos == pos)
      {
        for (std::size_t i = 0; i < it->synset_ids.size(); i++)
        {
          int id = it->synset_ids[i];
          synsets.push_back(wordnet_graph[id]);
        }
      }
    }
    return synsets;
  }
  const std::vector<std::string> *
  wordnet::get_synset(const std::string& word, pos_t pos) const {
    typedef std::vector<index> vi;
    std::pair<vi::const_iterator,vi::const_iterator> bounds = get_indexes_const(word);
    for (vi::const_iterator it = bounds.first; it != bounds.second; it++)
    {
      if (pos == pos_t::UNKNOWN || it->pos == pos)
      {
        int id = it->synset_ids[0];
        return &wordnet_graph[id].words;
      }
    }
    return nullptr;
  }
  std::pair<std::vector<index>::const_iterator, std::vector<index>::const_iterator>
  wordnet::get_indexes_const(const std::string& word) const
  {
    index light_index;
    light_index.lemma = word;
    typedef std::vector<index> vi;
    std::pair<vi::const_iterator,vi::const_iterator> bounds =
      std::equal_range(index_list.begin(), index_list.end(), light_index);
    return bounds;
  }
  std::pair<std::vector<index>::iterator, std::vector<index>::iterator>
  wordnet::get_indexes(const std::string& word)
  {
    index light_index;
    light_index.lemma = word;
    typedef std::vector<index> vi;
    std::pair<vi::iterator,vi::iterator> bounds =
      std::equal_range(index_list.begin(), index_list.end(), light_index);
    return bounds;
  }
  std::string
  wordnet::wordbase(const std::string& word, int ender)
  {
    if (ext::ends_with(word, info.sufx[ender]))
    {
      int sufxlen = info.sufx[ender].size();
      std::string strOut = word.substr(0, word.size() - sufxlen);
      if (!info.addr[ender].empty())
        strOut += info.addr[ender];
      return strOut;
    }
    return word;
  }
  bool is_defined(const std::string& word, pos_t pos)
  {
    // hack FIXME: Some verbs are built with -e suffix ('builde' is just an example).
    if (pos == V && word == "builde")
      return false;
    return true;
  }
  // Try to find baseform (lemma) of individual word in POS
  std::string
  wordnet::morphword(const std::string& word, pos_t pos)
  {
    // first look for word on exception list
    exc_t::iterator it = exc[pos].find(word);
    if (it != exc[pos].end())
      return it->second; // found in exception list
    std::string tmpbuf;
    std::string end;
    int cnt = 0;
    if (pos == R)
      return ""; // Only use exception list for adverbs
    if (pos == N)
    {
      if (ext::ends_with(word, "ful"))
      {
        cnt = word.size() - 3;
        tmpbuf = word.substr(0, cnt);
        end = "ful";
      }
      else
      {
        // check for noun ending with 'ss' or short words
        if (ext::ends_with(word, "ss") || word.size() <= 2)
          return "";
      }
    }
    // If not in exception list, try applying rules from tables
    if (tmpbuf.size() == 0)
      tmpbuf = word;
    if (pos != pos_t::UNKNOWN) 
    {
      int offset  = info.offsets[pos];
      int pos_cnt = info.cnts[pos];
      std::string morphed;
      for  (int i = 0; i < pos_cnt; i++)
      {
        morphed = wordbase(tmpbuf, (i + offset));
        if (morphed != tmpbuf && is_defined(morphed, pos))
           return morphed + end;
      }
      return morphed;
    }
    return word;
  }
 } // end of namespace wnb
--- a/contrib/wordnet-blast/wnb/core/wordnet.hh
+++ b/contrib/wordnet-blast/wnb/core/wordnet.hh
@ -0,0 +1,113 @@
 #pragma once
 # include <iostream>
 # include <string>
 # include <cassert>
 # include <vector>
 //# include <boost/filesystem.hpp>
 //Possible https://bugs.launchpad.net/ubuntu/+source/boost/+bug/270873
 # include <boost/graph/graph_traits.hpp>
 # include <boost/graph/adjacency_list.hpp>
 # include "load_wordnet.hh"
 # include "pos_t.hh"
 namespace wnb
 {
  /// More info here: http://wordnet.princeton.edu/wordnet/man/wndb.5WN.html
  struct info_helper;
  /// Synset
  struct synset
  {
    int  lex_filenum;
    std::size_t  w_cnt;
    std::vector<std::string> words;
    std::vector<int> lex_ids;
    std::size_t p_cnt;
    std::string gloss;
    // extra
    pos_t pos;        ///< pos (replace ss_type)
    int id;           ///< unique identifier (replace synset_offset)
    int sense_number; ///< http://wordnet.princeton.edu/man/senseidx.5WN.html
    std::vector<std::pair<std::string, int> > tag_cnts; ///< http://wordnet.princeton.edu/man/senseidx.5WN.html
    bool operator==(const synset& s) const { return (id == s.id);  }
    bool operator<(const synset& s) const { return (id < s.id);   }
  };
  /// Rel between synsets properties
  struct ptr
  {
    //std::string pointer_symbol; ///< symbol of the relation
    int pointer_symbol;
    int source; ///< source word inside synset
    int target; ///< target word inside synset
  };
  /// Index
  struct index
  {
    std::string lemma;
    std::size_t synset_cnt;
    std::size_t p_cnt;
    std::size_t sense_cnt;
    float       tagsense_cnt;
    std::vector<std::string> ptr_symbols;
    std::vector<int>         synset_offsets;
    // extra
    std::vector<int> synset_ids;
    pos_t pos;
    bool operator<(const index& b) const
    {
      return (lemma.compare(b.lemma) < 0);
    }
  };
  /// Wordnet interface class
  struct wordnet
  {
    typedef boost::adjacency_list<boost::vecS, boost::vecS,
                                  boost::directedS,
                                  synset, ptr> graph; ///< boost graph type
    /// Constructor
    wordnet(const std::string& wordnet_dir, bool verbose=false);
    /// Return synsets matching word
    std::vector<synset> get_synsets(const std::string& word, pos_t pos = pos_t::UNKNOWN);
    //FIXME: todo
    std::vector<synset> get_synset(const std::string& word, char pos, int i);
    // added
    const std::vector<std::string> * get_synset(const std::string& word, pos_t pos = pos_t::UNKNOWN) const;
    std::pair<std::vector<index>::iterator, std::vector<index>::iterator>
    get_indexes(const std::string& word);
    std::pair<std::vector<index>::const_iterator, std::vector<index>::const_iterator>
    get_indexes_const(const std::string& word) const;
    std::string wordbase(const std::string& word, int ender);
    std::string morphword(const std::string& word, pos_t pos);
    std::vector<index> index_list;    ///< index list // FIXME: use a map
    graph              wordnet_graph; ///< synsets graph
    info_helper        info;          ///< helper object
    bool               _verbose;
    typedef std::map<std::string,std::string> exc_t;
    std::map<pos_t, exc_t> exc;
  };
 } // end of namespace wnb
--- a/contrib/wordnet-blast/wnb/main.cc
+++ b/contrib/wordnet-blast/wnb/main.cc
@ -0,0 +1,180 @@
 #include <iostream>
 #include <fstream>
 #include <sstream>
 #include <boost/progress.hpp>
 #include <boost/algorithm/string.hpp>
 #include <wnb/core/wordnet.hh>
 #include <wnb/core/load_wordnet.hh>
 #include <wnb/core/info_helper.hh>
 #include <wnb/nltk_similarity.hh>
 #include <wnb/std_ext.hh>
 using namespace wnb;
 using namespace boost;
 using namespace boost::algorithm;
 bool usage(int argc, char ** argv)
 {
  std::string dir;
  if (argc >= 2)
    dir = std::string(argv[1]);
  if (argc != 3 || dir[dir.length()-1] != '/')
  {
    std::cout << argv[0] << " .../wordnet_dir/ word_list_file" << std::endl;
    return true;
  }
  return false;
 }
 struct ws
 {
  std::string w;
  float       s;
  bool operator<(const ws& a) const {return s > a.s;}
 };
 /// Compute similarity of word with words in word list
 std::vector<ws>
 compute_similarities(wordnet& wn,
                     const std::string& word,
                     const std::vector<std::string>& word_list)
 {
  std::vector<ws> wslist;
  std::vector<synset> synsets1 = wn.get_synsets(word);
  for (unsigned i = 0; i < synsets1.size(); i++)
    for (unsigned k = 0; k < synsets1[i].words.size(); k++)
      std::cout << " - " << synsets1[i].words[k] << std::endl;
  nltk_similarity path_similarity(wn);
  {
    progress_timer t;
    progress_display show_progress(word_list.size());
    for (unsigned k = 0; k < word_list.size(); k++)
    {
      const std::string& w = word_list[k];
      float max = 0;
      std::vector<synset> synsets2 = wn.get_synsets(w);
      for (unsigned i = 0; i < synsets1.size(); i++)
      {
        for (unsigned j = 0; j < synsets2.size(); j++)
        {
          float s = path_similarity(synsets1[i], synsets2[j], 6);
          if (s > max)
            max = s;
        }
      }
      ws e = {w, max};
      wslist.push_back(e);
      ++show_progress;
    }
  }
  return wslist;
 }
 void similarity_test(wordnet&                  wn,
                     const std::string&        word,
                     std::vector<std::string>& word_list)
 {
  std::vector<ws> wslist = compute_similarities(wn, word, word_list);
  std::stable_sort(wslist.begin(), wslist.end());
  for (unsigned i = 0; i < std::min(wslist.size(), size_t(10)); i++)
    std::cout << wslist[i].w << " " << wslist[i].s << std::endl;
 }
 void print_synsets(pos_t pos, wnb::index& idx, wordnet& wn)
 {
  std::string& mword = idx.lemma;
  std::cout << "\nOverview of " << get_name_from_pos(pos) << " " << mword << "\n\n";
  std::cout << "The " << get_name_from_pos(pos) << " " << mword << " has "
            << idx.synset_ids.size() << ((idx.synset_ids.size() == 1) ? " sense": " senses");
  if (idx.tagsense_cnt != 0)
    std::cout << " (first " << idx.tagsense_cnt << " from tagged texts)";
  else
    std::cout << " (no senses from tagged texts)";
  std::cout << "\n";
  std::cout << "                                      \n";
  for (std::size_t i = 0; i < idx.synset_ids.size(); i++)
  {
    int id = idx.synset_ids[i];
    const synset& synset = wn.wordnet_graph[id];
    std::cout << i+1 << ". ";
    for (std::size_t k = 0; k < synset.tag_cnts.size(); k++)
    {
      if (synset.tag_cnts[k].first == mword)
        std::cout << "(" << synset.tag_cnts[k].second << ") ";
    }
    std::vector<std::string> nwords;
    for (auto& w : synset.words)
      nwords.push_back((pos == A) ? w.substr(0, w.find_first_of("(")) : w);
    std::cout << replace_all_copy(join(nwords, ", "), "_", " ");
    std::cout << " -- (" << trim_copy(synset.gloss) << ")";
    std::cout << std::endl;
  }
 }
 void wn_like(wordnet& wn, const std::string& word, pos_t pos)
 {
  if (word == "")
    return;
  typedef std::vector<wnb::index> vi;
  std::pair<vi::iterator,vi::iterator> bounds = wn.get_indexes(word);
  for (vi::iterator it = bounds.first; it != bounds.second; it++)
  {
    if (pos != -1 && it->pos == pos)
    {
      print_synsets(pos, *it, wn);
    }
  }
 }
 void batch_test(wordnet& wn, std::vector<std::string>& word_list)
 {
  for (std::size_t i = 0; i < word_list.size(); i++)
  {
    for (unsigned p = 1; p < POS_ARRAY_SIZE; p++)
    {
      pos_t pos = (pos_t) p;
      wn_like(wn, word_list[i], pos);
      std::string mword = wn.morphword(word_list[i], pos);
      if (mword != word_list[i])
        wn_like(wn, mword, pos);
    }
  }
 }
 int main(int argc, char ** argv)
 {
  if (usage(argc, argv))
    return 1;
  // read command line
  std::string wordnet_dir = argv[1];
  std::string test_file   = argv[2];
  wordnet wn(wordnet_dir);
  // read test file
  std::string list = ext::read_file(test_file);
  std::vector<std::string> wl        =  ext::split(list);
  batch_test(wn, wl);
 }
--- a/contrib/wordnet-blast/wnb/nltk_similarity.hh
+++ b/contrib/wordnet-blast/wnb/nltk_similarity.hh
@ -0,0 +1,146 @@
 #ifndef _NLTK_SIMILARITY_HH
 # define _NLTK_SIMILARITY_HH
 # include <queue>
 # include <boost/graph/filtered_graph.hpp>
 # include <wnb/core/wordnet.hh>
 namespace wnb
 {
  namespace internal
  {
    //Helper class filtering out other than hypernym relations
    template <typename PointerSymbolMap>
    struct hyper_edge
    {
      hyper_edge() { }
      hyper_edge(PointerSymbolMap pointer_symbol)
        : m_pointer_symbol(pointer_symbol) { }
      template <typename Edge>
      bool operator()(const Edge& e) const
      {
        int p_s = get(m_pointer_symbol, e);
        return p_s == 1; // hypernyme (instance_hypernyme not used here)
      }
      PointerSymbolMap m_pointer_symbol;
    };
  } // end of anonymous namespace
  class nltk_similarity
  {
    typedef boost::property_map<wordnet::graph,
                                int ptr::*>::type PointerSymbolMap;
    typedef boost::filtered_graph<wordnet::graph,
                                  internal::hyper_edge<PointerSymbolMap> > G;
    typedef boost::graph_traits<G>::vertex_descriptor vertex;
    internal::hyper_edge<PointerSymbolMap> filter;
    G fg;
  public:
    nltk_similarity(wordnet& wn)
      : filter(get(&ptr::pointer_symbol, wn.wordnet_graph)),
                   fg(wn.wordnet_graph, filter)
    { }
    /// Get list of hypernyms of s along with distance to s
    std::map<vertex, int> hypernym_map(vertex s);
    /// Get shortest path between and synset1 and synset2.
    int shortest_path_distance(const synset& synset1, const synset& synset2);
    /// return disance
    float operator()(const synset& synset1, const synset& synset2, int=0);
  };
  std::map<nltk_similarity::vertex, int>
  nltk_similarity::hypernym_map(nltk_similarity::vertex s)
  {
    std::map<vertex, int> map;
    // Python:
    // for (hypernym in self[HYPERNYM])
    //   distances |= hypernym.hypernym_distances(distance+1);
    boost::graph_traits<G>::out_edge_iterator e, e_end;
    std::queue<vertex> q;
    q.push(s);
    map[s] = 0;
    while (!q.empty())
    {
      vertex u = q.front(); q.pop();
      int new_d = map[u] + 1;
      for (boost::tuples::tie(e, e_end) = out_edges(u, fg); e != e_end; ++e)
      {
        vertex v = target(*e,fg);
        q.push(v);
        if (map.find(v) != map.end())
        {
          if (new_d < map[v])
            map[v] = new_d;
          else
            q.pop();
        }
        else
          map[v] = new_d;
      }
    }
    return map;
  }
  int
  nltk_similarity::shortest_path_distance(const synset& synset1, const synset& synset2)
  {
    vertex v1 = synset1.id;
    vertex v2 = synset2.id;
    std::map<vertex, int> map1 = hypernym_map(v1);
    std::map<vertex, int> map2 = hypernym_map(v2);
    // For each ancestor synset common to both subject synsets, find the
    // connecting path length. Return the shortest of these.
    int path_distance = -1;
    std::map<vertex, int>::iterator it, it2;
    for (it = map1.begin(); it != map1.end(); it++)
      for (it2 = map2.begin(); it2 != map2.end(); it2++)
        if (fg[it->first] == fg[it2->first])
        {
          int new_distance = it->second + it2->second;
          if (path_distance < 0 || new_distance < path_distance)
            path_distance = new_distance;
        }
    return path_distance;
  }
  float
  nltk_similarity::operator()(const synset& synset1, const synset& synset2, int)
  {
    int distance = shortest_path_distance(synset1, synset2);
    if (distance >= 0)
      return 1. / (distance + 1);
    else
      return -1;
  }
 } // end of namespace wnb
 #endif /* _NLTK_SIMILARITY_HH */
--- a/contrib/wordnet-blast/wnb/std_ext.hh
+++ b/contrib/wordnet-blast/wnb/std_ext.hh
@ -0,0 +1,90 @@
 #ifndef _STD_EXT_HH
 # define _STD_EXT_HH
 # include <string>
 # include <sstream>
 # include <fstream>
 # include <algorithm>
 # include <stdexcept>
 namespace ext
 {
  /// Read a file, return the content as a C++ string
  inline
  std::string read_file(const std::string& fn)
  {
    std::ifstream is;
    is.open(fn.c_str(), std::ios::binary);
    if (!is.is_open())
      throw std::runtime_error("File not found: " + fn);
    std::string str((std::istreambuf_iterator<char>(is)),
                     std::istreambuf_iterator<char>());
    return str;
  }
  /// Split a std::string
  inline
  std::vector<std::string> split(const std::string& str)
  {
    std::vector<std::string> tokens;
    std::istringstream iss(str);
    copy(std::istream_iterator<std::string>(iss),
         std::istream_iterator<std::string>(),
         std::back_inserter< std::vector<std::string> >(tokens));
    return tokens;
  }
  /// Split a std::string on separator
  inline
  std::vector<std::string> split(const std::string& s, char seperator)
  {
    std::vector<std::string> output;
    std::string::size_type prev_pos = 0, pos = 0;
    while((pos = s.find(seperator, pos)) != std::string::npos)
    {
      std::string substring( s.substr(prev_pos, pos-prev_pos) );
      output.push_back(substring);
      prev_pos = ++pos;
    }
    output.push_back(s.substr(prev_pos, pos-prev_pos));
    return output;
  }
  inline
  bool
  ends_with(const std::string& str, const std::string& ending)
  {
    if (str.length() >= ending.length())
    {
      int cmp = str.compare(str.length() - ending.length(),
                            ending.length(), ending);
      return (0 == cmp);
    }
    return false;
 }
  /// Sorted unique
  template <typename T>
  inline
  T s_unique(T& v)
  {
    T out;
    std::sort(v.begin(), v.end());
    typename T::iterator last = std::unique(v.begin(),v.end());
    out.resize(last - v.begin());
    std::copy(v.begin(), last, out.begin());
    return out;
  }
 } // end of ext
 #endif /* _STD_EXT_HH */
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@ -124,3 +124,4 @@ endif()
 set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-sanitize=signed-integer-overflow)
 target_link_libraries(clickhouse_functions PRIVATE stemmer)
 target_link_libraries(clickhouse_functions PRIVATE wnb)
--- a/src/Interpreters/SynonymsExtensions.cpp
+++ b/src/Interpreters/SynonymsExtensions.cpp
@ -1,10 +1,11 @@
 #include <Common/Exception.h>
-#include <Interpreters/SynonymsExtensions.h>
+#include <Functions/SynonymsExtensions.h>
 #include <fstream>
 #include <list>
 #include <boost/algorithm/string.hpp>
 #include <wnb/core/wordnet.hh>
 namespace DB
 {
@ -48,7 +49,7 @@ public:
        }
    }
-    Synset * getSynonyms(const std::string_view & token) const override
+    const Synset * getSynonyms(const std::string_view & token) const override
    {
        auto it = table.find(token);
@ -62,20 +63,23 @@ public:
 class WordnetSynonymsExtension : public ISynonymsExtension
 {
 private:
-    // std::vector<std::vector<String>> data;
+    wnb::wordnet wn;
 public:
-    WordnetSynonymsExtension(const String & /*path*/)
+    WordnetSynonymsExtension(const String & path) : wn(path) {}
    {
-    }
+    const Synset * getSynonyms(const std::string_view & token) const override
    Synset * getSynonyms(const std::string_view & /*token*/) const override
    {
-        return nullptr;
+        return wn.get_synset(std::string(token));
    }
 };
 /// Duplicate of code from StringUtils.h. Copied here for less dependencies.
 static bool startsWith(const std::string & s, const char * prefix)
 {
    return s.size() >= strlen(prefix) && 0 == memcmp(s.data(), prefix, strlen(prefix));
 }
 SynonymsExtensions::SynonymsExtensions(const Poco::Util::AbstractConfiguration & config)
 {
    String prefix = "synonyms_extensions";
@ -89,7 +93,7 @@ SynonymsExtensions::SynonymsExtensions(const Poco::Util::AbstractConfiguration &
    for (const auto & key : keys)
    {
-        if (key == "extension")
+        if (startsWith(key, "extension"))
        {
            const auto & ext_name = config.getString(prefix + "." + key + ".name", "");
            const auto & ext_path = config.getString(prefix + "." + key + ".path", "");
--- a/src/Interpreters/SynonymsExtensions.h
+++ b/src/Interpreters/SynonymsExtensions.h
@ -19,7 +19,7 @@ public:
    //ISynonymsExtension(const String & path);
-    virtual Synset * getSynonyms(const std::string_view & token) const = 0;
+    virtual const Synset * getSynonyms(const std::string_view & token) const = 0;
    virtual ~ISynonymsExtension() = default;
 };
--- a/src/Functions/synonyms.cpp
+++ b/src/Functions/synonyms.cpp
@ -6,7 +6,7 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>
-#include <Interpreters/SynonymsExtensions.h>
+#include <Functions/SynonymsExtensions.h>
 #include <Interpreters/Context.h>
 #include <string_view>
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -53,7 +53,6 @@
 #include <Interpreters/InterserverCredentials.h>
 #include <Interpreters/Cluster.h>
 #include <Interpreters/InterserverIOHandler.h>
 #include <Interpreters/SynonymsExtensions.h>
 #include <Interpreters/SystemLog.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/DDLWorker.h>
@ -75,7 +74,7 @@
 #include <Interpreters/DatabaseCatalog.h>
 #include <Storages/MergeTree/BackgroundJobsExecutor.h>
 #include <Storages/MergeTree/MergeTreeDataPartUUID.h>
-
+#include <Functions/SynonymsExtensions.h>
 namespace ProfileEvents
 {