Skip to content
Snippets Groups Projects
Commit eeedd889 authored by Will Sinatra's avatar Will Sinatra Committed by Patrycja Rosa
Browse files

community/py3-nltk: upgrade to 3.9.1

parent 52e7d2c0
No related branches found
No related tags found
1 merge request!75442community/py3-nltk: upgrade to 3.9.1
Pipeline #274489 canceled
...@@ -3,17 +3,19 @@ ...@@ -3,17 +3,19 @@
maintainer="Will Sinatra <wpsinatra@gmail.com>" maintainer="Will Sinatra <wpsinatra@gmail.com>"
pkgname=py3-nltk pkgname=py3-nltk
_pkgorig=nltk _pkgorig=nltk
pkgver=3.8.2 pkgver=3.9.1
pkgrel=1 pkgrel=0
pkgdesc="Natural Language Toolkit" pkgdesc="Natural Language Toolkit"
url="https://github.com/nltk/nltk" url="https://github.com/nltk/nltk"
arch="noarch" arch="noarch"
license="Apache-2.0" license="Apache-2.0"
depends="python3 py3-yaml py3-click py3-regex" depends="python3 py3-yaml py3-click py3-regex"
makedepends="py3-gpep517 py3-setuptools py3-wheel" makedepends="py3-gpep517 py3-setuptools py3-wheel"
checkdepends="py3-nose py3-requests" checkdepends="py3-matplotlib py3-numpy py3-pytest py3-pytest-mock"
subpackages="$pkgname-pyc" subpackages="$pkgname-pyc"
source="$pkgname-$pkgver.tar.gz::https://github.com/nltk/nltk/archive/$pkgver.tar.gz" source="$pkgname-$pkgver.tar.gz::https://github.com/nltk/nltk/archive/$pkgver.tar.gz
fix-import-wordnet.patch
"
builddir="$srcdir/$_pkgorig-$pkgver" builddir="$srcdir/$_pkgorig-$pkgver"
build() { build() {
...@@ -22,9 +24,11 @@ build() { ...@@ -22,9 +24,11 @@ build() {
--output-fd 3 3>&1 >&2 --output-fd 3 3>&1 >&2
} }
# tests require nltk-data; use smoke test for now
check() { check() {
PYTHONPATH=build/lib python3 -c "from nltk import *" python3 -m venv --clear --without-pip --system-site-packages .testenv
.testenv/bin/python3 -m installer .dist/*.whl
.testenv/bin/python3 -m nltk.downloader all
.testenv/bin/python3 -m pytest
} }
package() { package() {
...@@ -35,5 +39,6 @@ package() { ...@@ -35,5 +39,6 @@ package() {
} }
sha512sums=" sha512sums="
cdd76d1185aa7a0c4e96af19e311120ac70f425e75cd52e37d20d32f2722490c4e74535c44113da5856fb682530ea598446f998fda4e186c898cd675064dee6b py3-nltk-3.8.2.tar.gz 7e384a23fa3839c29d38c083d6288d15ff04eb071c4bca3ec1bfb3b9cfcac8d2388041a950d6c71684ee29b791318312df99e29ae258eb5e602ccf2ad1407840 py3-nltk-3.9.1.tar.gz
9c4491d6ebdaad056b7bd7a3ebe7c720ad47568181aec2a1801b198e1ed81a9174ec2811227d37a336fe689f05afb5ac983fe0d05b3a379aff9bf4cfa87aba69 fix-import-wordnet.patch
" "
Patch-Source: https://github.com/nltk/nltk/pull/3309
--
From 7d1cbc71441f607daea0894333912268e2911cab Mon Sep 17 00:00:00 2001
From: Eric Kafe <kafe.eric@gmail.com>
Date: Sun, 18 Aug 2024 16:09:01 +0200
Subject: [PATCH] Fix bug in WordNetLemmatizer
---
nltk/stem/wordnet.py | 71 ++++++++++++++++++++++++--------------------
1 file changed, 39 insertions(+), 32 deletions(-)
diff --git a/nltk/stem/wordnet.py b/nltk/stem/wordnet.py
index 76caf1b09c..87d08c7891 100644
--- a/nltk/stem/wordnet.py
+++ b/nltk/stem/wordnet.py
@@ -7,64 +7,71 @@
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
-from nltk.corpus import wordnet as wn
-
class WordNetLemmatizer:
"""
WordNet Lemmatizer
- Provides 3 lemmatizer modes:
-
- 1. _morphy() is an alias to WordNet's _morphy lemmatizer.
- It returns a list of all lemmas found in WordNet.
-
- >>> wnl = WordNetLemmatizer()
- >>> print(wnl._morphy('us', 'n'))
- ['us', 'u']
-
- 2. morphy() is a restrictive wrapper around _morphy().
- It returns the first lemma found in WordNet,
- or None if no lemma is found.
+ Provides 3 lemmatizer modes: _morphy(), morphy() and lemmatize().
- >>> print(wnl.morphy('us', 'n'))
- us
-
- >>> print(wnl.morphy('catss'))
- None
-
- 3. lemmatize() is a permissive wrapper around _morphy().
+ lemmatize() is a permissive wrapper around _morphy().
It returns the shortest lemma found in WordNet,
or the input string unchanged if nothing is found.
- >>> print(wnl.lemmatize('us', 'n'))
+ >>> from nltk.stem import WordNetLemmatizer as wnl
+ >>> print(wnl().lemmatize('us', 'n'))
u
- >>> print(wnl.lemmatize('Anythinggoeszxcv'))
+ >>> print(wnl().lemmatize('Anythinggoeszxcv'))
Anythinggoeszxcv
"""
- morphy = wn.morphy
+ def _morphy(self, form, pos, check_exceptions=True):
+ """
+ _morphy() is WordNet's _morphy lemmatizer.
+ It returns a list of all lemmas found in WordNet.
+
+ >>> from nltk.stem import WordNetLemmatizer as wnl
+ >>> print(wnl()._morphy('us', 'n'))
+ ['us', 'u']
+ """
+ from nltk.corpus import wordnet as wn
+
+ return wn._morphy(form, pos, check_exceptions)
+
+ def morphy(self, form, pos=None, check_exceptions=True):
+ """
+ morphy() is a restrictive wrapper around _morphy().
+ It returns the first lemma found in WordNet,
+ or None if no lemma is found.
+
+ >>> from nltk.stem import WordNetLemmatizer as wnl
+ >>> print(wnl().morphy('us', 'n'))
+ us
+
+ >>> print(wnl().morphy('catss'))
+ None
+ """
+ from nltk.corpus import wordnet as wn
- _morphy = wn._morphy
+ return wn.morphy(form, pos, check_exceptions)
def lemmatize(self, word: str, pos: str = "n") -> str:
"""Lemmatize `word` by picking the shortest of the possible lemmas,
using the wordnet corpus reader's built-in _morphy function.
Returns the input word unchanged if it cannot be found in WordNet.
- >>> from nltk.stem import WordNetLemmatizer
- >>> wnl = WordNetLemmatizer()
- >>> print(wnl.lemmatize('dogs'))
+ >>> from nltk.stem import WordNetLemmatizer as wnl
+ >>> print(wnl().lemmatize('dogs'))
dog
- >>> print(wnl.lemmatize('churches'))
+ >>> print(wnl().lemmatize('churches'))
church
- >>> print(wnl.lemmatize('aardwolves'))
+ >>> print(wnl().lemmatize('aardwolves'))
aardwolf
- >>> print(wnl.lemmatize('abaci'))
+ >>> print(wnl().lemmatize('abaci'))
abacus
- >>> print(wnl.lemmatize('hardrock'))
+ >>> print(wnl().lemmatize('hardrock'))
hardrock
:param word: The input word to lemmatize.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment