Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 21c6b2b

Browse files
committed
added basic CoreNLP support
1 parent 687a0d1 commit 21c6b2b

File tree

3 files changed

+127
-18
lines changed

3 files changed

+127
-18
lines changed

‎src/Classes/EntityClean.php

Lines changed: 0 additions & 18 deletions
This file was deleted.

‎src/CoreNlp.php

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
<?php
2+
3+
namespace Web64\Nlp;
4+
5+
/**
6+
* wget http://nlp.stanford.edu/software/stanford-corenlp-full-2018年02月27日.zip
7+
* unzip stanford-corenlp-full-2018年02月27日.zip
8+
* cd stanford-corenlp-full-2018年02月27日
9+
* java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000
10+
*/
11+
class CoreNlp
12+
{
13+
public $api_url = 'http://homestead:9000/';
14+
15+
public $properties = [];
16+
public $data;
17+
18+
public function entities( $text )
19+
{
20+
$this->properties = [
21+
'annotators' => 'ner',
22+
//'annotators' => 'ner,sentiment',
23+
//'annotators' =>'tokenize,ssplit,pos,lemma,ner,depparse,coref,quote,sentiment',
24+
'outputFormat' => 'json',
25+
];
26+
27+
$this->data = $this->post_call( $text );
28+
29+
30+
$entities = [];
31+
32+
if ( empty( $this->data['sentences']) )
33+
return null;
34+
35+
36+
foreach( $this->data['sentences'] as $sentence )
37+
{
38+
foreach( $sentence as $key => $value )
39+
{
40+
// echo " - {$key} \n";
41+
// if ( $key == 'sentimentDistribution' || $key == 'sentiment' || $key == 'sentimentValue' )
42+
// {
43+
44+
// print_r($value);
45+
// echo PHP_EOL;
46+
// }
47+
48+
if ( $key == 'entitymentions' )
49+
{
50+
foreach($value as $entity)
51+
{
52+
if ( !isset($entities[ $entity['ner'] ]) )
53+
$entities[ $entity['ner'] ] = [];
54+
55+
if ( array_search( $entity['text'] , $entities[ $entity['ner'] ]) === false )
56+
{
57+
$entities[ $entity['ner'] ][] = $entity['text'];
58+
}
59+
60+
}
61+
62+
63+
}
64+
}
65+
66+
}
67+
68+
69+
return $entities;
70+
}
71+
72+
public function post_call( $text )
73+
{
74+
$opts = array('http' =>
75+
array(
76+
'method' => 'POST',
77+
'header' => 'Content-type: application/x-www-form-urlencoded',
78+
'content' => $text,
79+
)
80+
);
81+
82+
$url = $this->api_url;
83+
$url .= "?properties=" . urlencode( json_encode( $this->properties ) );
84+
echo "URL: {$url}\n\n";
85+
86+
$context = stream_context_create($opts);
87+
$result = @file_get_contents($url, false, $context);
88+
89+
file_put_contents("corenlp.json", $result);
90+
91+
return json_decode($result, 1);
92+
}
93+
}

‎tests/Unit/CoreNlpTest.php

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<?php
2+
3+
namespace Tests\Unit;
4+
5+
use Tests\TestCase;
6+
7+
class CoreNlpTest extends TestCase
8+
{
9+
/** @test */
10+
public function test_core_nlp()
11+
{
12+
$corenlp = new \Web64\Nlp\CoreNlp();
13+
echo PHP_EOL. PHP_EOL;
14+
$text = "Catalonia: Ex-police chief Trapero charged with sedition. The former chief of Catalonia's police force, Josep Lluis Trapero, has been charged over events linked with last year's independence referendum.";
15+
16+
$text = "German Foreign Ministry investigating arrest of German reporter in Turkey.
17+
Germany's Foreign Ministry has said it is investigating claims that a German journalist has been detained in Turkey. Adil Demirci is believed to have been arrested while on vacation in Istanbul.
18+
The German Foreign Ministry said on Friday that it was responding to reports that Adil Demirci, a journalist for the left-leaning Turkish news agency Etha, had been detained in Istanbul, although the reasons for his arrest remained unclear.
19+
The detention of another journalist after the release of Deniz Yücel would be another obstacle in already frayed German-Turkish relations.
20+
What we know about Adil Demirci's arrest so far
21+
22+
Reports suggested that Demirci was one of three Etha reporters detained during a series of police raids in Istanbul overnight on Thursday.
23+
The German Foreign Ministry said it was \"working on the basis that Adil Demirci has been arrested,\" but admitted Turkish authorities had yet to confirm such reports.
24+
Demirci reportedly worked as a Germany correspondent for Etha and lived in the city of Cologn.
25+
News of Demirci's arrest was first reported by Mesale Tolu, a colleague of his at Etha. Tolu herself was one of several German nationals arrested in Turkey year last year for political reasons. Tolu tweeted on Friday morning that Demirci was detained with two colleagues, Pınar Gayip and Semiha Sahin, during a raid. Demirci, who reportedly holds both German and Turkish citizenship, was in Istanbul on holiday, Tolu said. Reports suggested he was scheduled to fly back to Germany on Saturday.
26+
";
27+
echo $text . PHP_EOL. PHP_EOL;
28+
$entities = $corenlp->entities( $text );
29+
print_r( $entities );
30+
31+
32+
$this->assertNotEmpty( $entities['COUNTRY'] );
33+
}
34+
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /