Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
apollo
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
openSource
apollo
Commits
b3ec3531
Commit
b3ec3531
authored
Nov 08, 2016
by
lepdou
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactor env health check logs
parent
56e2d45e
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
35 deletions
+44
-35
PortalSettings.java
...ava/com/ctrip/framework/apollo/portal/PortalSettings.java
+20
-22
AdminServiceAddressLocator.java
...amework/apollo/portal/api/AdminServiceAddressLocator.java
+13
-6
RetryableRestTemplate.java
...ip/framework/apollo/portal/api/RetryableRestTemplate.java
+11
-7
No files found.
apollo-portal/src/main/java/com/ctrip/framework/apollo/portal/PortalSettings.java
View file @
b3ec3531
...
...
@@ -28,9 +28,9 @@ import javax.annotation.PostConstruct;
@Component
public
class
PortalSettings
{
private
Logger
logger
=
LoggerFactory
.
getLogger
(
PortalSettings
.
class
);
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
PortalSettings
.
class
);
private
static
final
int
HEALTH_CHECK_INTERVAL
=
10
*
1000
;
private
static
final
String
DEFAULT_SUPPORT_ENV_LIST
=
"FAT,UAT,PRO"
;
@Autowired
...
...
@@ -44,14 +44,10 @@ public class PortalSettings {
//mark env up or down
private
Map
<
Env
,
Boolean
>
envStatusMark
=
new
ConcurrentHashMap
<>();
private
ScheduledExecutorService
healthCheckService
;
@PostConstruct
private
void
postConstruct
()
{
//初始化portal支持操作的环境集合,线上的portal可能支持所有的环境操作,而线下环境则支持一部分.
// 每个环境的portal支持哪些环境配置在数据库里
String
serverConfig
=
serverConfigService
.
getValue
(
"apollo.portal.envs"
,
"FAT,UAT,PRO"
);
String
serverConfig
=
serverConfigService
.
getValue
(
"apollo.portal.envs"
,
DEFAULT_SUPPORT_ENV_LIST
);
String
[]
configedEnvs
=
serverConfig
.
split
(
","
);
List
<
String
>
allStrEnvs
=
Arrays
.
asList
(
configedEnvs
);
for
(
String
e
:
allStrEnvs
)
{
...
...
@@ -62,7 +58,7 @@ public class PortalSettings {
envStatusMark
.
put
(
env
,
true
);
}
healthCheckService
=
Executors
.
newScheduledThreadPool
(
1
);
ScheduledExecutorService
healthCheckService
=
Executors
.
newScheduledThreadPool
(
1
);
healthCheckService
.
scheduleWithFixedDelay
(
new
HealthCheckTask
(
applicationContext
),
1000
,
HEALTH_CHECK_INTERVAL
,
...
...
@@ -86,16 +82,16 @@ public class PortalSettings {
class
HealthCheckTask
implements
Runnable
{
private
static
final
int
ENV_D
IED_THREAD
HOLD
=
2
;
private
static
final
int
ENV_D
OWN_THRES
HOLD
=
2
;
private
Map
<
Env
,
Long
>
healthCheckFailCnt
=
new
HashMap
<>();
private
Map
<
Env
,
Integer
>
healthCheckFailedCounter
=
new
HashMap
<>();
private
AdminServiceAPI
.
HealthAPI
healthAPI
;
public
HealthCheckTask
(
ApplicationContext
context
)
{
healthAPI
=
context
.
getBean
(
AdminServiceAPI
.
HealthAPI
.
class
);
for
(
Env
env
:
allEnvs
)
{
healthCheckFail
Cnt
.
put
(
env
,
0
l
);
healthCheckFail
edCounter
.
put
(
env
,
0
);
}
}
...
...
@@ -107,17 +103,17 @@ public class PortalSettings {
//revive
if
(!
envStatusMark
.
get
(
env
))
{
envStatusMark
.
put
(
env
,
true
);
healthCheckFail
Cnt
.
put
(
env
,
0
l
);
logger
.
info
(
"
env up again [env:{}]
"
,
env
);
healthCheckFail
edCounter
.
put
(
env
,
0
);
logger
.
info
(
"
Env revived because env health check success. env: {}
"
,
env
);
}
}
else
{
//maybe meta server up but admin server down
logger
.
warn
(
"Env health check failed, maybe because of admin server down. env: {}"
,
env
);
handleEnvDown
(
env
);
}
}
catch
(
Exception
e
)
{
//maybe meta server down
logger
.
warn
(
"health check fail. [env:{}]"
,
env
,
e
.
getMessage
()
);
logger
.
warn
(
"Env health check failed, maybe because of meta server down "
+
"or config error meta server address. env: {}"
,
env
);
handleEnvDown
(
env
);
}
}
...
...
@@ -130,17 +126,19 @@ public class PortalSettings {
}
private
void
handleEnvDown
(
Env
env
)
{
long
failCnt
=
healthCheckFailCnt
.
get
(
env
);
healthCheckFail
Cnt
.
put
(
env
,
++
failCnt
);
int
failedTimes
=
healthCheckFailedCounter
.
get
(
env
);
healthCheckFail
edCounter
.
put
(
env
,
++
failedTimes
);
if
(!
envStatusMark
.
get
(
env
))
{
logger
.
warn
(
"[env:{}] down yet."
,
env
);
logger
.
error
(
"Env is down. env: {}, failed times: {}"
,
env
,
failedTimes
);
}
else
{
if
(
fail
Cnt
>=
ENV_DIED_THREAD
HOLD
)
{
if
(
fail
edTimes
>=
ENV_DOWN_THRES
HOLD
)
{
envStatusMark
.
put
(
env
,
false
);
logger
.
error
(
"env turn to down [env:{}]"
,
env
);
logger
.
error
(
"Env is down because health check failed for {} times, "
+
"which equals to down threshold. env: {}"
,
ENV_DOWN_THRESHOLD
,
env
);
}
else
{
logger
.
warn
(
"env health check fail first time. [env:{}]"
,
env
);
logger
.
warn
(
"Env health check failed for {} times which less than down threshold. down threshold:{}, env: {}"
,
failedTimes
,
ENV_DOWN_THRESHOLD
,
env
);
}
}
...
...
apollo-portal/src/main/java/com/ctrip/framework/apollo/portal/api/AdminServiceAddressLocator.java
View file @
b3ec3531
...
...
@@ -8,6 +8,8 @@ import com.ctrip.framework.apollo.core.enums.Env;
import
com.ctrip.framework.apollo.portal.PortalSettings
;
import
com.dianping.cat.Cat
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.boot.autoconfigure.web.HttpMessageConverters
;
import
org.springframework.http.client.HttpComponentsClientHttpRequestFactory
;
...
...
@@ -35,6 +37,7 @@ public class AdminServiceAddressLocator {
private
static
final
long
OFFLINE_REFRESH_INTERVAL
=
10
*
1000
;
private
static
final
int
RETRY_TIMES
=
3
;
private
static
final
String
ADMIN_SERVICE_URL_PATH
=
"/services/admin"
;
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
AdminServiceAddressLocator
.
class
);
private
ScheduledExecutorService
refreshServiceAddressService
;
private
RestTemplate
restTemplate
;
...
...
@@ -91,10 +94,12 @@ public class AdminServiceAddressLocator {
refreshSuccess
=
refreshSuccess
&&
currentEnvRefreshResult
;
}
if
(
refreshSuccess
){
refreshServiceAddressService
.
schedule
(
new
RefreshAdminServerAddressTask
(),
NORMAL_REFRESH_INTERVAL
,
TimeUnit
.
MILLISECONDS
);
if
(
refreshSuccess
)
{
refreshServiceAddressService
.
schedule
(
new
RefreshAdminServerAddressTask
(),
NORMAL_REFRESH_INTERVAL
,
TimeUnit
.
MILLISECONDS
);
}
else
{
refreshServiceAddressService
.
schedule
(
new
RefreshAdminServerAddressTask
(),
OFFLINE_REFRESH_INTERVAL
,
TimeUnit
.
MILLISECONDS
);
refreshServiceAddressService
.
schedule
(
new
RefreshAdminServerAddressTask
(),
OFFLINE_REFRESH_INTERVAL
,
TimeUnit
.
MILLISECONDS
);
}
}
}
...
...
@@ -110,9 +115,11 @@ public class AdminServiceAddressLocator {
}
cache
.
put
(
env
,
Arrays
.
asList
(
services
));
return
true
;
}
catch
(
Throwable
e
)
{
//meta server error
Cat
.
logError
(
"get admin server address fail"
,
e
);
continue
;
}
catch
(
Throwable
e
)
{
logger
.
error
(
String
.
format
(
"Get admin server address from meta server failed. env: %s, meta server address:%s"
,
env
,
MetaDomainConsts
.
getDomain
(
env
)),
e
);
Cat
.
logError
(
String
.
format
(
"Get admin server address from meta server failed. env: %s, meta server address:%s"
,
env
,
MetaDomainConsts
.
getDomain
(
env
)),
e
);
}
}
return
false
;
...
...
apollo-portal/src/main/java/com/ctrip/framework/apollo/portal/api/RetryableRestTemplate.java
View file @
b3ec3531
...
...
@@ -10,6 +10,8 @@ import com.dianping.cat.message.Transaction;
import
org.apache.http.conn.ConnectTimeoutException
;
import
org.apache.http.conn.HttpHostConnectException
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.core.ParameterizedTypeReference
;
import
org.springframework.http.HttpMethod
;
...
...
@@ -32,6 +34,8 @@ import javax.annotation.PostConstruct;
@Component
public
class
RetryableRestTemplate
{
private
Logger
logger
=
LoggerFactory
.
getLogger
(
RetryableRestTemplate
.
class
);
private
UriTemplateHandler
uriTemplateHandler
=
new
DefaultUriTemplateHandler
();
private
RestTemplate
restTemplate
;
...
...
@@ -56,7 +60,7 @@ public class RetryableRestTemplate {
Object
...
uriVariables
)
throws
RestClientException
{
return
execute
(
env
,
path
,
reference
,
uriVariables
);
return
exchangeGet
(
env
,
path
,
reference
,
uriVariables
);
}
public
<
T
>
T
post
(
Env
env
,
String
path
,
Object
request
,
Class
<
T
>
responseType
,
Object
...
uriVariables
)
...
...
@@ -93,10 +97,10 @@ public class RetryableRestTemplate {
ct
.
complete
();
return
result
;
}
catch
(
Throwable
t
)
{
logger
.
error
(
"Http request failed, uri: {}, method: {}"
,
uri
,
method
,
t
);
Cat
.
logError
(
t
);
if
(
canRetry
(
t
,
method
))
{
Cat
.
logEvent
(
CatEventType
.
API_RETRY
,
uri
);
continue
;
}
else
{
//biz exception rethrow
ct
.
setStatus
(
t
);
ct
.
complete
();
...
...
@@ -112,8 +116,8 @@ public class RetryableRestTemplate {
throw
e
;
}
private
<
T
>
ResponseEntity
<
T
>
ex
ecute
(
Env
env
,
String
path
,
ParameterizedTypeReference
<
T
>
reference
,
Object
...
uriVariables
)
{
private
<
T
>
ResponseEntity
<
T
>
ex
changeGet
(
Env
env
,
String
path
,
ParameterizedTypeReference
<
T
>
reference
,
Object
...
uriVariables
)
{
if
(
path
.
startsWith
(
"/"
))
{
path
=
path
.
substring
(
1
,
path
.
length
());
}
...
...
@@ -133,9 +137,9 @@ public class RetryableRestTemplate {
ct
.
complete
();
return
result
;
}
catch
(
Throwable
t
)
{
logger
.
error
(
"Http request failed, uri: {}, method: {}"
,
uri
,
HttpMethod
.
GET
,
t
);
Cat
.
logError
(
t
);
Cat
.
logEvent
(
CatEventType
.
API_RETRY
,
uri
);
continue
;
}
}
...
...
@@ -147,7 +151,7 @@ public class RetryableRestTemplate {
}
private
List
<
ServiceDTO
>
getAdminServices
(
Env
env
,
Transaction
ct
)
{
private
List
<
ServiceDTO
>
getAdminServices
(
Env
env
,
Transaction
ct
)
{
List
<
ServiceDTO
>
services
=
adminServiceAddressLocator
.
getServiceList
(
env
);
...
...
@@ -180,7 +184,7 @@ public class RetryableRestTemplate {
restTemplate
.
delete
(
parseHost
(
service
)
+
path
,
uriVariables
);
break
;
default
:
throw
new
UnsupportedOperationException
(
String
.
format
(
"
not
supported http method(method=%s)"
,
method
));
throw
new
UnsupportedOperationException
(
String
.
format
(
"
un
supported http method(method=%s)"
,
method
));
}
return
result
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment